2 * Jalview - A Sequence Alignment Editor and Viewer
\r
3 * Copyright (C) 2005 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle
\r
5 * This program is free software; you can redistribute it and/or
\r
6 * modify it under the terms of the GNU General Public License
\r
7 * as published by the Free Software Foundation; either version 2
\r
8 * of the License, or (at your option) any later version.
\r
10 * This program is distributed in the hope that it will be useful,
\r
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
\r
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
\r
13 * GNU General Public License for more details.
\r
15 * You should have received a copy of the GNU General Public License
\r
16 * along with this program; if not, write to the Free Software
\r
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
\r
19 package jalview.datamodel;
\r
21 import jalview.analysis.*;
\r
23 import jalview.util.*;
\r
27 /** Data structure to hold and manipulate a multiple sequence alignment
\r
29 public class Alignment implements AlignmentI
\r
31 protected Alignment dataset;
\r
32 protected Vector sequences;
\r
33 protected Vector groups = new Vector();
\r
34 protected Vector superGroup = new Vector();
\r
35 protected char gapCharacter = '-';
\r
36 protected int type = NUCLEOTIDE;
\r
37 public static final int PROTEIN = 0;
\r
38 public static final int NUCLEOTIDE = 1;
\r
40 /** DOCUMENT ME!! */
\r
41 public AlignmentAnnotation[] annotations;
\r
43 HiddenSequences hiddenSequences = new HiddenSequences(this);
\r
46 /** Make an alignment from an array of Sequences.
\r
50 public Alignment(SequenceI[] seqs)
\r
54 if( jalview.util.Comparison.isNucleotide(seqs))
\r
59 sequences = new Vector();
\r
61 for (i = 0; i < seqs.length; i++)
\r
63 sequences.addElement(seqs[i]);
\r
65 if(seqs[i].getDatasetSequence()!=null
\r
66 && seqs[i].getDatasetSequence().getAnnotation()!=null)
\r
69 for(int a=0; a<seqs[i].getDatasetSequence().getAnnotation().length; a++)
\r
71 this.addAnnotation(seqs[i].getDatasetSequence().getAnnotation()[a], seqs[i]);
\r
82 * @return DOCUMENT ME!
\r
84 public Vector getSequences()
\r
92 * @param i DOCUMENT ME!
\r
94 * @return DOCUMENT ME!
\r
96 public SequenceI getSequenceAt(int i)
\r
98 if (i < sequences.size())
\r
100 return (SequenceI) sequences.elementAt(i);
\r
106 /** Adds a sequence to the alignment. Recalculates maxLength and size.
\r
110 public void addSequence(SequenceI snew)
\r
112 sequences.addElement(snew);
\r
118 * @param seq DOCUMENT ME!
\r
120 public void addSequence(SequenceI[] seq)
\r
122 for (int i = 0; i < seq.length; i++)
\r
124 addSequence(seq[i]);
\r
128 /** Adds a sequence to the alignment. Recalculates maxLength and size.
\r
132 public void setSequenceAt(int i, SequenceI snew)
\r
134 SequenceI oldseq = getSequenceAt(i);
\r
135 deleteSequence(oldseq);
\r
137 sequences.setElementAt(snew, i);
\r
143 * @return DOCUMENT ME!
\r
145 public Vector getGroups()
\r
150 /** Takes out columns consisting entirely of gaps (-,.," ")
\r
152 public void removeGaps()
\r
155 int iSize = getWidth();
\r
157 for (int i = 0; i < iSize; i++)
\r
159 boolean delete = true;
\r
161 for (int j = 0; j < getHeight(); j++)
\r
163 current = getSequenceAt(j);
\r
165 if (current.getLength() > i)
\r
167 /* MC Should move this to a method somewhere */
\r
168 if (!jalview.util.Comparison.isGap(current.getCharAt(i)))
\r
177 deleteColumns(i, i);
\r
184 /** Removes a range of columns (start to end inclusive).
\r
186 * @param start Start column in the alignment
\r
187 * @param end End column in the alignment
\r
189 public void deleteColumns(int start, int end)
\r
191 deleteColumns(0, getHeight() - 1, start, end);
\r
197 * @param seq1 DOCUMENT ME!
\r
198 * @param seq2 DOCUMENT ME!
\r
199 * @param start DOCUMENT ME!
\r
200 * @param end DOCUMENT ME!
\r
202 public void deleteColumns(int seq1, int seq2, int start, int end)
\r
204 for (int i = 0; i <= (end - start); i++)
\r
206 for (int j = seq1; j <= seq2; j++)
\r
208 getSequenceAt(j).deleteCharAt(start);
\r
216 * @param i DOCUMENT ME!
\r
218 public void trimLeft(int i)
\r
220 int j, jSize = getHeight();
\r
221 for (j = 0; j < jSize; j++)
\r
223 SequenceI s = getSequenceAt(j);
\r
224 int newstart = s.findPosition(i);
\r
226 if(i>s.getLength())
\r
228 sequences.removeElement(s);
\r
234 s.setStart(newstart);
\r
235 s.setSequence(s.getSequence().substring(i));
\r
243 * @param i DOCUMENT ME!
\r
245 public void trimRight(int i)
\r
247 for (int j = 0; j < getHeight(); j++)
\r
249 SequenceI s = getSequenceAt(j);
\r
250 int newend = s.findPosition(i);
\r
253 if(s.getLength()>i)
\r
254 s.setSequence(s.getSequence().substring(0, i + 1));
\r
261 * @param s DOCUMENT ME!
\r
263 public void deleteSequence(SequenceI s)
\r
265 for (int i = 0; i < getHeight(); i++)
\r
267 if (getSequenceAt(i) == s)
\r
277 * @param i DOCUMENT ME!
\r
279 public void deleteSequence(int i)
\r
281 sequences.removeElementAt(i);
\r
287 * @param threshold DOCUMENT ME!
\r
288 * @param sel DOCUMENT ME!
\r
290 * @return DOCUMENT ME!
\r
292 public Vector removeRedundancy(float threshold, Vector sel)
\r
294 Vector del = new Vector();
\r
296 for (int i = 1; i < sel.size(); i++)
\r
298 for (int j = 0; j < i; j++)
\r
300 // Only do the comparison if either have not been deleted
\r
301 if (!del.contains((SequenceI) sel.elementAt(i)) ||
\r
302 !del.contains((SequenceI) sel.elementAt(j)))
\r
304 // use PID instead of Comparison (which is really not pleasant)
\r
305 float pid = Comparison.PID((SequenceI) sel.elementAt(j),
\r
306 (SequenceI) sel.elementAt(i));
\r
308 if (pid >= threshold)
\r
310 // Delete the shortest one
\r
311 if (((SequenceI) sel.elementAt(j)).getSequence().length() > ((SequenceI) sel
\r
313 i)).getSequence().length())
\r
315 del.addElement(sel.elementAt(i));
\r
319 del.addElement(sel.elementAt(i));
\r
326 // Now delete the sequences
\r
327 for (int i = 0; i < del.size(); i++)
\r
329 deleteSequence((SequenceI) del.elementAt(i));
\r
336 public SequenceGroup findGroup(int i)
\r
338 return findGroup(getSequenceAt(i));
\r
342 public SequenceGroup findGroup(SequenceI s)
\r
344 for (int i = 0; i < this.groups.size(); i++)
\r
346 SequenceGroup sg = (SequenceGroup) groups.elementAt(i);
\r
348 if (sg.sequences.contains(s))
\r
360 * @param s DOCUMENT ME!
\r
362 * @return DOCUMENT ME!
\r
364 public SequenceGroup[] findAllGroups(SequenceI s)
\r
366 Vector temp = new Vector();
\r
368 int gSize = groups.size();
\r
369 for (int i = 0; i < gSize; i++)
\r
371 SequenceGroup sg = (SequenceGroup) groups.elementAt(i);
\r
372 if(sg==null || sg.sequences==null)
\r
374 this.deleteGroup(sg);
\r
379 if (sg.sequences.contains(s))
\r
381 temp.addElement(sg);
\r
385 SequenceGroup[] ret = new SequenceGroup[temp.size()];
\r
387 for (int i = 0; i < temp.size(); i++)
\r
389 ret[i] = (SequenceGroup) temp.elementAt(i);
\r
398 public void addGroup(SequenceGroup sg)
\r
400 if (!groups.contains(sg))
\r
402 groups.addElement(sg);
\r
409 public void deleteAllGroups()
\r
411 groups.removeAllElements();
\r
412 superGroup.removeAllElements();
\r
416 while (i < sequences.size())
\r
418 SequenceI s = getSequenceAt(i);
\r
419 s.setColor(java.awt.Color.white);
\r
425 public void deleteGroup(SequenceGroup g)
\r
427 if (groups.contains(g))
\r
429 groups.removeElement(g);
\r
434 public SequenceI findName(String name)
\r
438 while (i < sequences.size())
\r
440 if (getSequenceAt(i).getName().equals(name))
\r
442 return getSequenceAt(i);
\r
453 public int findIndex(SequenceI s)
\r
457 while (i < sequences.size())
\r
459 if (s == getSequenceAt(i))
\r
473 * @return DOCUMENT ME!
\r
475 public int getHeight()
\r
477 return sequences.size();
\r
483 * @return DOCUMENT ME!
\r
485 public int getWidth()
\r
487 int maxLength = -1;
\r
489 for (int i = 0; i < sequences.size(); i++)
\r
491 if (getSequenceAt(i).getLength() > maxLength)
\r
493 maxLength = getSequenceAt(i).getLength();
\r
503 * @return DOCUMENT ME!
\r
505 public int getMaxIdLength()
\r
510 while (i < sequences.size())
\r
512 SequenceI seq = getSequenceAt(i);
\r
513 String tmp = seq.getName() + "/" + seq.getStart() + "-" +
\r
516 if (tmp.length() > max)
\r
518 max = tmp.length();
\r
530 * @param gc DOCUMENT ME!
\r
532 public void setGapCharacter(char gc)
\r
536 for (int i = 0; i < sequences.size(); i++)
\r
538 Sequence seq = (Sequence) sequences.elementAt(i);
\r
539 seq.sequence = seq.sequence.replace('.', gc);
\r
540 seq.sequence = seq.sequence.replace('-', gc);
\r
541 seq.sequence = seq.sequence.replace(' ', gc);
\r
548 * @return DOCUMENT ME!
\r
550 public char getGapCharacter()
\r
552 return gapCharacter;
\r
558 * @return DOCUMENT ME!
\r
560 public Vector getAAFrequency()
\r
562 return AAFrequency.calculate(sequences, 0, getWidth());
\r
568 * @return DOCUMENT ME!
\r
570 public boolean isAligned()
\r
572 int width = getWidth();
\r
574 for (int i = 0; i < sequences.size(); i++)
\r
576 if (getSequenceAt(i).getLength() != width)
\r
588 * @param aa DOCUMENT ME!
\r
590 public void deleteAnnotation(AlignmentAnnotation aa)
\r
594 if (annotations != null)
\r
596 aSize = annotations.length;
\r
599 AlignmentAnnotation[] temp = new AlignmentAnnotation[aSize - 1];
\r
603 for (int i = 0; i < aSize; i++)
\r
605 if (annotations[i] == aa)
\r
610 temp[tIndex] = annotations[i];
\r
614 annotations = temp;
\r
619 * @param aa AlignmentAnnotation
\r
620 * @param seqRef The sequence to associate this annotation with
\r
621 * @return The adjusted AlignmentAnnotation, with dataset sequence and annotation added
\r
623 public AlignmentAnnotation addAnnotation(AlignmentAnnotation aa, SequenceI seqRef)
\r
627 //We can only add Annotations to the dataset sequences
\r
628 if(seqRef.getDatasetSequence()==null)
\r
633 AlignmentAnnotation [] old = seqRef.getDatasetSequence().getAnnotation();
\r
635 //First check if this is a new annotation or not. If it is new,
\r
636 //we must add the annotation to the dataset
\r
637 boolean newAnnotation = true;
\r
638 if(seqRef.getDatasetSequence().getAnnotation()!=null)
\r
640 for(int a=0; a<old.length; a++)
\r
645 newAnnotation = false;
\r
653 seqRef.getDatasetSequence().addAlignmentAnnotation(aa);
\r
656 AlignmentAnnotation copy = null;
\r
658 copy = new AlignmentAnnotation(
\r
659 aa.label, aa.description, aa.annotations, aa.graphMin,
\r
660 aa.graphMax, aa.graph
\r
663 copy = new AlignmentAnnotation(
\r
664 aa.label, aa.description, aa.annotations
\r
667 copy.datasetAnnotation = aa;
\r
669 addAnnotation(copy);
\r
671 copy.sequenceRef = seqRef;
\r
682 public void adjustSequenceAnnotations()
\r
684 if(annotations!=null)
\r
686 for (int a = 0; a < annotations.length; a++)
\r
688 if (annotations[a].sequenceRef != null)
\r
690 annotations[a].adjustForAlignment();
\r
699 * @param aa DOCUMENT ME!
\r
701 public void addAnnotation(AlignmentAnnotation aa)
\r
704 if (annotations != null)
\r
706 aSize = annotations.length + 1;
\r
709 AlignmentAnnotation[] temp = new AlignmentAnnotation[aSize];
\r
711 temp[aSize-1] = aa;
\r
717 for (i = 0; i < (aSize-1); i++)
\r
719 temp[i] = annotations[i];
\r
723 annotations = temp;
\r
726 public void setAnnotationIndex(AlignmentAnnotation aa, int index)
\r
728 if(aa==null || annotations==null || annotations.length-1<index)
\r
731 int aSize = annotations.length;
\r
732 AlignmentAnnotation[] temp = new AlignmentAnnotation[aSize];
\r
736 for (int i = 0; i < aSize; i++)
\r
742 temp[i] = annotations[i];
\r
744 temp[i] = annotations[i-1];
\r
747 annotations = temp;
\r
753 * @return DOCUMENT ME!
\r
755 public AlignmentAnnotation[] getAlignmentAnnotation()
\r
757 return annotations;
\r
760 public void setNucleotide(boolean b)
\r
768 public boolean isNucleotide()
\r
770 if(type==NUCLEOTIDE)
\r
776 public void setDataset(Alignment data)
\r
778 if(dataset==null && data==null)
\r
780 // Create a new dataset for this alignment.
\r
781 // Can only be done once, if dataset is not null
\r
782 // This will not be performed
\r
783 Sequence[] seqs = new Sequence[getHeight()];
\r
784 for (int i = 0; i < getHeight(); i++)
\r
787 seqs[i] = new Sequence(getSequenceAt(i).getName(),
\r
788 AlignSeq.extractGaps(
\r
789 jalview.util.Comparison.GapChars,
\r
790 getSequenceAt(i).getSequence()
\r
792 getSequenceAt(i).getStart(),
\r
793 getSequenceAt(i).getEnd());
\r
795 getSequenceAt(i).setDatasetSequence(seqs[i]);
\r
798 dataset = new Alignment(seqs);
\r
800 else if(dataset==null && data!=null)
\r
806 public Alignment getDataset()
\r
811 public boolean padGaps() {
\r
812 boolean modified=false;
\r
813 int Width = getWidth();
\r
815 for (int i = 0; i < sequences.size();
\r
818 current = getSequenceAt(i);
\r
820 if (current.getLength() < Width)
\r
822 current.insertCharAt(Width - 1, gapCharacter);
\r
829 public HiddenSequences getHiddenSequences()
\r
831 return hiddenSequences;
\r