2 * Jalview - A Sequence Alignment Editor and Viewer
\r
3 * Copyright (C) 2005 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle
\r
5 * This program is free software; you can redistribute it and/or
\r
6 * modify it under the terms of the GNU General Public License
\r
7 * as published by the Free Software Foundation; either version 2
\r
8 * of the License, or (at your option) any later version.
\r
10 * This program is distributed in the hope that it will be useful,
\r
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
\r
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
\r
13 * GNU General Public License for more details.
\r
15 * You should have received a copy of the GNU General Public License
\r
16 * along with this program; if not, write to the Free Software
\r
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
\r
20 package jalview.datamodel;
\r
22 import jalview.analysis.*;
\r
23 import jalview.util.*;
\r
26 /** Data structure to hold and manipulate a multiple sequence alignment
\r
28 public class Alignment implements AlignmentI
\r
31 protected Vector sequences;
\r
32 protected Vector groups = new Vector();
\r
33 protected Vector superGroup = new Vector();
\r
34 protected char gapCharacter = '-';
\r
35 public AlignmentAnnotation [] annotations;
\r
37 public boolean featuresAdded = false;
\r
39 /** Make an alignment from an array of Sequences.
\r
43 public Alignment(SequenceI[] seqs) {
\r
44 sequences = new Vector();
\r
46 for (int i=0; i < seqs.length; i++)
\r
47 sequences.addElement(seqs[i]);
\r
52 public Vector getSequences() {
\r
56 public SequenceI getSequenceAt(int i) {
\r
57 if (i < sequences.size()) {
\r
58 return (SequenceI)sequences.elementAt(i);
\r
64 /** Adds a sequence to the alignment. Recalculates maxLength and size.
\r
68 public void addSequence(SequenceI snew) {
\r
69 sequences.addElement(snew);
\r
72 public void addSequence(SequenceI[] seq) {
\r
73 for (int i=0; i < seq.length; i++) {
\r
74 addSequence(seq[i]);
\r
78 /** Adds a sequence to the alignment. Recalculates maxLength and size.
\r
82 public void setSequenceAt(int i,SequenceI snew) {
\r
83 SequenceI oldseq = getSequenceAt(i);
\r
84 deleteSequence(oldseq);
\r
86 sequences.setElementAt(snew,i);
\r
89 public Vector getGroups() {
\r
93 /** Sorts the sequences by sequence group size - largest to smallest.
\r
96 public void sortGroups() {
\r
97 float[] arr = new float [groups.size()];
\r
98 Object[] s = new Object[groups.size()];
\r
100 for (int i=0; i < groups.size(); i++) {
\r
101 arr[i] = ((SequenceGroup)groups.elementAt(i)).sequences.size();
\r
102 s[i] = groups.elementAt(i);
\r
105 QuickSort.sort(arr,s);
\r
107 Vector newg = new Vector(groups.size());
\r
109 for (int i=groups.size()-1; i >= 0; i--) {
\r
110 newg.addElement(s[i]);
\r
116 /** Takes out columns consisting entirely of gaps (-,.," ")
\r
118 public void removeGaps()
\r
122 int iSize = getWidth();
\r
123 for (int i=0; i < iSize; i++)
\r
125 boolean delete = true;
\r
126 for (int j=0; j < getHeight(); j++)
\r
128 current = getSequenceAt(j);
\r
129 if (current.getLength() > i)
\r
131 /* MC Should move this to a method somewhere */
\r
132 if ( !jalview.util.Comparison.isGap(current.getCharAt(i)))
\r
140 deleteColumns(i,i);
\r
149 /** Returns an array of Sequences containing columns
\r
150 * start to end (inclusive) only.
\r
152 * @param start start column to fetch
\r
153 * @param end end column to fetch
\r
154 * @return Array of Sequences, ready to put into a new Alignment
\r
156 public SequenceI[] getColumns(int start, int end) {
\r
157 return getColumns(0,getHeight()-1,start,end);
\r
160 /** Removes a range of columns (start to end inclusive).
\r
162 * @param start Start column in the alignment
\r
163 * @param end End column in the alignment
\r
165 public void deleteColumns(int start, int end) {
\r
166 deleteColumns(0,getHeight()-1,start,end);
\r
169 public void deleteColumns(int seq1, int seq2, int start, int end) {
\r
171 for (int i=0; i <= (end-start); i++) {
\r
172 for (int j=seq1; j <= seq2; j++) {
\r
173 getSequenceAt(j).deleteCharAt(start);
\r
178 public void insertColumns(SequenceI[] seqs, int pos) {
\r
179 if (seqs.length == getHeight()) {
\r
180 for (int i=0; i < getHeight();i++) {
\r
181 String tmp = new String(getSequenceAt(i).getSequence());
\r
182 getSequenceAt(i).setSequence(tmp.substring(0,pos) + seqs[i].getSequence() + tmp.substring(pos));
\r
188 public SequenceI[] getColumns(int seq1, int seq2, int start, int end) {
\r
189 SequenceI[] seqs = new Sequence[(seq2-seq1)+1];
\r
190 for (int i=seq1; i<= seq2; i++ ) {
\r
191 seqs[i] = new Sequence(getSequenceAt(i).getName(),
\r
192 getSequenceAt(i).getSequence().substring(start,end),
\r
193 getSequenceAt(i).findPosition(start),
\r
194 getSequenceAt(i).findPosition(end));
\r
199 public void trimLeft(int i) {
\r
200 for (int j = 0;j< getHeight();j++) {
\r
202 SequenceI s = getSequenceAt(j);
\r
203 int newstart = s.findPosition(i);
\r
205 s.setStart(newstart);
\r
206 s.setSequence(s.getSequence().substring(i));
\r
211 public void trimRight(int i) {
\r
212 for (int j = 0;j< getHeight();j++) {
\r
213 SequenceI s = getSequenceAt(j);
\r
214 int newend = s.findPosition(i);
\r
217 s.setSequence(s.getSequence().substring(0,i+1));
\r
221 public void deleteSequence(SequenceI s)
\r
223 for (int i=0; i < getHeight(); i++)
\r
224 if (getSequenceAt(i) == s)
\r
228 public void deleteSequence(int i)
\r
230 sequences.removeElementAt(i);
\r
234 public Vector removeRedundancy(float threshold, Vector sel) {
\r
235 Vector del = new Vector();
\r
237 for (int i = 1; i < sel.size(); i++)
\r
239 for (int j = 0; j < i; j++)
\r
241 // Only do the comparison if either have not been deleted
\r
242 if (!del.contains( (SequenceI) sel.elementAt(i)) ||
\r
243 !del.contains( (SequenceI) sel.elementAt(j)))
\r
245 // use PID instead of Comparison (which is really not pleasant)
\r
246 float pid = Comparison.PID( (SequenceI) sel.elementAt(j),
\r
247 (SequenceI) sel.elementAt(i));
\r
249 if (pid >= threshold)
\r
251 // Delete the shortest one
\r
252 if ( ( (SequenceI) sel.elementAt(j)).getSequence().length() >
\r
253 ( (SequenceI) sel.elementAt(i)).getSequence().length())
\r
254 del.addElement(sel.elementAt(i));
\r
256 del.addElement(sel.elementAt(i));
\r
262 // Now delete the sequences
\r
263 for (int i=0; i < del.size(); i++)
\r
264 deleteSequence((SequenceI)del.elementAt(i));
\r
269 public void sortByPID(SequenceI s) {
\r
271 float scores[] = new float[getHeight()];
\r
272 SequenceI seqs[] = new SequenceI[getHeight()];
\r
274 for (int i = 0; i < getHeight(); i++) {
\r
275 scores[i] = Comparison.compare(getSequenceAt(i),s);
\r
276 seqs[i] = getSequenceAt(i);
\r
279 QuickSort.sort(scores,0,scores.length-1,seqs);
\r
283 if (getHeight()%2 == 0) {
\r
284 len = getHeight()/2;
\r
286 len = (getHeight()+1)/2;
\r
289 for (int i = 0; i < len; i++) {
\r
290 SequenceI tmp = seqs[i];
\r
291 sequences.setElementAt(seqs[getHeight()-i-1],i);
\r
292 sequences.setElementAt(tmp,getHeight()-i-1);
\r
296 public void sortByID() {
\r
297 String ids[] = new String[getHeight()];
\r
298 SequenceI seqs[] = new SequenceI[getHeight()];
\r
300 for (int i = 0; i < getHeight(); i++) {
\r
301 ids[i] = getSequenceAt(i).getName();
\r
302 seqs[i] = getSequenceAt(i);
\r
305 QuickSort.sort(ids,seqs);
\r
309 if (getHeight()%2 == 0) {
\r
310 len = getHeight()/2;
\r
312 len = (getHeight()+1)/2;
\r
313 System.out.println("DEBUG:Sort len is odd = " + len); // log.
\r
315 for (int i = 0; i < len; i++) {
\r
316 System.out.println("DEBUG:Swapping " + seqs[i].getName() + " and " + seqs[getHeight()-i-1].getName()); // log.
\r
317 SequenceI tmp = seqs[i];
\r
318 sequences.setElementAt(seqs[getHeight()-i-1],i);
\r
319 sequences.setElementAt(tmp,getHeight()-i-1);
\r
324 public SequenceGroup findGroup(int i) {
\r
325 return findGroup(getSequenceAt(i));
\r
329 public SequenceGroup findGroup(SequenceI s) {
\r
330 for (int i = 0; i < this.groups.size();i++)
\r
332 SequenceGroup sg = (SequenceGroup)groups.elementAt(i);
\r
333 if (sg.sequences.contains(s))
\r
340 public SequenceGroup [] findAllGroups(SequenceI s)
\r
343 Vector temp = new Vector();
\r
345 for (int i = 0; i < this.groups.size();i++)
\r
347 SequenceGroup sg = (SequenceGroup)groups.elementAt(i);
\r
349 if (sg.sequences.contains(s))
\r
350 temp.addElement(sg);
\r
353 SequenceGroup [] ret = new SequenceGroup[temp.size()];
\r
354 for(int i=0; i<temp.size(); i++)
\r
355 ret[i] = (SequenceGroup)temp.elementAt(i);
\r
361 public void addToGroup(SequenceGroup g, SequenceI s) {
\r
362 if (!(g.sequences.contains(s))) {
\r
363 g.sequences.addElement(s);
\r
367 public void removeFromGroup(SequenceGroup g,SequenceI s) {
\r
368 if (g != null && g.sequences != null) {
\r
369 if (g.sequences.contains(s)) {
\r
370 g.sequences.removeElement(s);
\r
371 if (g.sequences.size() == 0) {
\r
372 groups.removeElement(g);
\r
378 public void addSuperGroup(SuperGroup sg)
\r
380 superGroup.addElement(sg);
\r
383 public void removeSuperGroup(SuperGroup sg)
\r
385 superGroup.removeElement(sg);
\r
388 public SuperGroup getSuperGroup(SequenceGroup sg)
\r
390 for (int i = 0; i < this.superGroup.size(); i++)
\r
392 SuperGroup temp = (SuperGroup) superGroup.elementAt(i);
\r
393 if (temp.sequenceGroups.contains(sg))
\r
400 public void addGroup(SequenceGroup sg) {
\r
401 if(!groups.contains(sg))
\r
402 groups.addElement(sg);
\r
405 public void deleteAllGroups()
\r
407 groups.removeAllElements();
\r
408 superGroup.removeAllElements();
\r
410 while (i < sequences.size()) {
\r
411 SequenceI s = getSequenceAt(i);
\r
412 s.setColor(java.awt.Color.white);
\r
420 public void deleteGroup(SequenceGroup g) {
\r
421 if (groups.contains(g)) {
\r
422 groups.removeElement(g);
\r
427 public SequenceI findName(String name) {
\r
429 while (i < sequences.size()) {
\r
430 SequenceI s = getSequenceAt(i);
\r
431 if (s.getName().equals(name))
\r
440 public SequenceI findbyDisplayId(String name) {
\r
442 while (i < sequences.size()) {
\r
443 SequenceI s = getSequenceAt(i);
\r
444 if (s.getDisplayId().equals(name))
\r
453 public int findIndex(SequenceI s)
\r
456 while (i < sequences.size())
\r
458 if (s == getSequenceAt(i))
\r
466 public int getHeight() {
\r
467 return sequences.size();
\r
471 public int getWidth()
\r
473 int maxLength = -1;
\r
474 for (int i = 0; i < sequences.size(); i++)
\r
476 if (getSequenceAt(i).getLength() > maxLength)
\r
477 maxLength = getSequenceAt(i).getLength();
\r
484 public int getMaxIdLength() {
\r
488 while (i < sequences.size()) {
\r
489 SequenceI seq = getSequenceAt(i);
\r
490 String tmp = seq.getName() + "/" + seq.getStart() + "-" + seq.getEnd();
\r
492 if (tmp.length() > max) {
\r
493 max = tmp.length();
\r
501 public void setGapCharacter(char gc)
\r
504 for (int i=0; i < sequences.size(); i++)
\r
506 Sequence seq = (Sequence)sequences.elementAt(i);
\r
507 seq.sequence = seq.sequence.replace('.', gc);
\r
508 seq.sequence = seq.sequence.replace('-', gc);
\r
512 public char getGapCharacter() {
\r
513 return gapCharacter;
\r
516 public Vector getAAFrequency()
\r
518 return AAFrequency.calculate(sequences, 0, getWidth());
\r
521 public boolean isAligned()
\r
523 int width = getWidth();
\r
524 for (int i = 0; i < sequences.size(); i++)
\r
525 if (getSequenceAt(i).getLength() != width)
\r
531 public void deleteAnnotation(AlignmentAnnotation aa)
\r
534 if(annotations!=null)
\r
535 aSize = annotations.length;
\r
537 AlignmentAnnotation [] temp = new AlignmentAnnotation [aSize-1];
\r
540 for (int i = 0; i < aSize; i++)
\r
542 if(annotations[i]==aa)
\r
546 temp[tIndex] = annotations[i];
\r
550 annotations = temp;
\r
554 public void addAnnotation(AlignmentAnnotation aa)
\r
557 if(annotations!=null)
\r
558 aSize = annotations.length+1;
\r
560 AlignmentAnnotation [] temp = new AlignmentAnnotation [aSize];
\r
563 for (i = 0; i < aSize-1; i++)
\r
564 temp[i] = annotations[i];
\r
568 annotations = temp;
\r
570 public AlignmentAnnotation[] getAlignmentAnnotation()
\r
572 return annotations;
\r