2 * Jalview - A Sequence Alignment Editor and Viewer
\r
3 * Copyright (C) 2005 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle
\r
5 * This program is free software; you can redistribute it and/or
\r
6 * modify it under the terms of the GNU General Public License
\r
7 * as published by the Free Software Foundation; either version 2
\r
8 * of the License, or (at your option) any later version.
\r
10 * This program is distributed in the hope that it will be useful,
\r
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
\r
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
\r
13 * GNU General Public License for more details.
\r
15 * You should have received a copy of the GNU General Public License
\r
16 * along with this program; if not, write to the Free Software
\r
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
\r
19 package jalview.datamodel;
\r
21 import jalview.analysis.*;
\r
23 import jalview.util.*;
\r
28 /** Data structure to hold and manipulate a multiple sequence alignment
\r
30 public class Alignment implements AlignmentI {
\r
31 protected Vector sequences;
\r
32 protected Vector groups = new Vector();
\r
33 protected Vector superGroup = new Vector();
\r
34 protected char gapCharacter = '-';
\r
35 public AlignmentAnnotation[] annotations;
\r
36 public boolean featuresAdded = false;
\r
38 /** Make an alignment from an array of Sequences.
\r
42 public Alignment(SequenceI[] seqs) {
\r
43 sequences = new Vector();
\r
45 for (int i = 0; i < seqs.length; i++)
\r
46 sequences.addElement(seqs[i]);
\r
51 public Vector getSequences() {
\r
55 public SequenceI getSequenceAt(int i) {
\r
56 if (i < sequences.size()) {
\r
57 return (SequenceI) sequences.elementAt(i);
\r
63 /** Adds a sequence to the alignment. Recalculates maxLength and size.
\r
67 public void addSequence(SequenceI snew) {
\r
68 sequences.addElement(snew);
\r
71 public void addSequence(SequenceI[] seq) {
\r
72 for (int i = 0; i < seq.length; i++) {
\r
73 addSequence(seq[i]);
\r
77 /** Adds a sequence to the alignment. Recalculates maxLength and size.
\r
81 public void setSequenceAt(int i, SequenceI snew) {
\r
82 SequenceI oldseq = getSequenceAt(i);
\r
83 deleteSequence(oldseq);
\r
85 sequences.setElementAt(snew, i);
\r
88 public Vector getGroups() {
\r
92 /** Sorts the sequences by sequence group size - largest to smallest.
\r
95 public void sortGroups() {
\r
96 float[] arr = new float[groups.size()];
\r
97 Object[] s = new Object[groups.size()];
\r
99 for (int i = 0; i < groups.size(); i++) {
\r
100 arr[i] = ((SequenceGroup) groups.elementAt(i)).sequences.size();
\r
101 s[i] = groups.elementAt(i);
\r
104 QuickSort.sort(arr, s);
\r
106 Vector newg = new Vector(groups.size());
\r
108 for (int i = groups.size() - 1; i >= 0; i--) {
\r
109 newg.addElement(s[i]);
\r
115 /** Takes out columns consisting entirely of gaps (-,.," ")
\r
117 public void removeGaps() {
\r
119 int iSize = getWidth();
\r
121 for (int i = 0; i < iSize; i++) {
\r
122 boolean delete = true;
\r
124 for (int j = 0; j < getHeight(); j++) {
\r
125 current = getSequenceAt(j);
\r
127 if (current.getLength() > i) {
\r
128 /* MC Should move this to a method somewhere */
\r
129 if (!jalview.util.Comparison.isGap(current.getCharAt(i))) {
\r
136 deleteColumns(i, i);
\r
143 /** Returns an array of Sequences containing columns
\r
144 * start to end (inclusive) only.
\r
146 * @param start start column to fetch
\r
147 * @param end end column to fetch
\r
148 * @return Array of Sequences, ready to put into a new Alignment
\r
150 public SequenceI[] getColumns(int start, int end) {
\r
151 return getColumns(0, getHeight() - 1, start, end);
\r
154 /** Removes a range of columns (start to end inclusive).
\r
156 * @param start Start column in the alignment
\r
157 * @param end End column in the alignment
\r
159 public void deleteColumns(int start, int end) {
\r
160 deleteColumns(0, getHeight() - 1, start, end);
\r
163 public void deleteColumns(int seq1, int seq2, int start, int end) {
\r
164 for (int i = 0; i <= (end - start); i++) {
\r
165 for (int j = seq1; j <= seq2; j++) {
\r
166 getSequenceAt(j).deleteCharAt(start);
\r
171 public void insertColumns(SequenceI[] seqs, int pos) {
\r
172 if (seqs.length == getHeight()) {
\r
173 for (int i = 0; i < getHeight(); i++) {
\r
174 String tmp = new String(getSequenceAt(i).getSequence());
\r
175 getSequenceAt(i).setSequence(tmp.substring(0, pos) +
\r
176 seqs[i].getSequence() + tmp.substring(pos));
\r
181 public SequenceI[] getColumns(int seq1, int seq2, int start, int end) {
\r
182 SequenceI[] seqs = new Sequence[(seq2 - seq1) + 1];
\r
184 for (int i = seq1; i <= seq2; i++) {
\r
185 seqs[i] = new Sequence(getSequenceAt(i).getName(),
\r
186 getSequenceAt(i).getSequence().substring(start, end),
\r
187 getSequenceAt(i).findPosition(start),
\r
188 getSequenceAt(i).findPosition(end));
\r
194 public void trimLeft(int i) {
\r
195 for (int j = 0; j < getHeight(); j++) {
\r
196 SequenceI s = getSequenceAt(j);
\r
197 int newstart = s.findPosition(i);
\r
199 s.setStart(newstart);
\r
200 s.setSequence(s.getSequence().substring(i));
\r
204 public void trimRight(int i) {
\r
205 for (int j = 0; j < getHeight(); j++) {
\r
206 SequenceI s = getSequenceAt(j);
\r
207 int newend = s.findPosition(i);
\r
210 s.setSequence(s.getSequence().substring(0, i + 1));
\r
214 public void deleteSequence(SequenceI s) {
\r
215 for (int i = 0; i < getHeight(); i++)
\r
216 if (getSequenceAt(i) == s) {
\r
221 public void deleteSequence(int i) {
\r
222 sequences.removeElementAt(i);
\r
225 public Vector removeRedundancy(float threshold, Vector sel) {
\r
226 Vector del = new Vector();
\r
228 for (int i = 1; i < sel.size(); i++) {
\r
229 for (int j = 0; j < i; j++) {
\r
230 // Only do the comparison if either have not been deleted
\r
231 if (!del.contains((SequenceI) sel.elementAt(i)) ||
\r
232 !del.contains((SequenceI) sel.elementAt(j))) {
\r
233 // use PID instead of Comparison (which is really not pleasant)
\r
234 float pid = Comparison.PID((SequenceI) sel.elementAt(j),
\r
235 (SequenceI) sel.elementAt(i));
\r
237 if (pid >= threshold) {
\r
238 // Delete the shortest one
\r
239 if (((SequenceI) sel.elementAt(j)).getSequence().length() > ((SequenceI) sel
\r
241 i)).getSequence().length()) {
\r
242 del.addElement(sel.elementAt(i));
\r
244 del.addElement(sel.elementAt(i));
\r
251 // Now delete the sequences
\r
252 for (int i = 0; i < del.size(); i++)
\r
253 deleteSequence((SequenceI) del.elementAt(i));
\r
258 public void sortByPID(SequenceI s) {
\r
259 float[] scores = new float[getHeight()];
\r
260 SequenceI[] seqs = new SequenceI[getHeight()];
\r
262 for (int i = 0; i < getHeight(); i++) {
\r
263 scores[i] = Comparison.compare(getSequenceAt(i), s);
\r
264 seqs[i] = getSequenceAt(i);
\r
267 QuickSort.sort(scores, 0, scores.length - 1, seqs);
\r
271 if ((getHeight() % 2) == 0) {
\r
272 len = getHeight() / 2;
\r
274 len = (getHeight() + 1) / 2;
\r
277 for (int i = 0; i < len; i++) {
\r
278 SequenceI tmp = seqs[i];
\r
279 sequences.setElementAt(seqs[getHeight() - i - 1], i);
\r
280 sequences.setElementAt(tmp, getHeight() - i - 1);
\r
284 public void sortByID() {
\r
285 String[] ids = new String[getHeight()];
\r
286 SequenceI[] seqs = new SequenceI[getHeight()];
\r
288 for (int i = 0; i < getHeight(); i++) {
\r
289 ids[i] = getSequenceAt(i).getName();
\r
290 seqs[i] = getSequenceAt(i);
\r
293 QuickSort.sort(ids, seqs);
\r
297 if ((getHeight() % 2) == 0) {
\r
298 len = getHeight() / 2;
\r
300 len = (getHeight() + 1) / 2;
\r
301 System.out.println("DEBUG:Sort len is odd = " + len); // log.
\r
304 for (int i = 0; i < len; i++) {
\r
305 System.out.println("DEBUG:Swapping " + seqs[i].getName() + " and " +
\r
306 seqs[getHeight() - i - 1].getName()); // log.
\r
308 SequenceI tmp = seqs[i];
\r
309 sequences.setElementAt(seqs[getHeight() - i - 1], i);
\r
310 sequences.setElementAt(tmp, getHeight() - i - 1);
\r
315 public SequenceGroup findGroup(int i) {
\r
316 return findGroup(getSequenceAt(i));
\r
320 public SequenceGroup findGroup(SequenceI s) {
\r
321 for (int i = 0; i < this.groups.size(); i++) {
\r
322 SequenceGroup sg = (SequenceGroup) groups.elementAt(i);
\r
324 if (sg.sequences.contains(s)) {
\r
332 public SequenceGroup[] findAllGroups(SequenceI s) {
\r
333 Vector temp = new Vector();
\r
335 for (int i = 0; i < this.groups.size(); i++) {
\r
336 SequenceGroup sg = (SequenceGroup) groups.elementAt(i);
\r
338 if (sg.sequences.contains(s)) {
\r
339 temp.addElement(sg);
\r
343 SequenceGroup[] ret = new SequenceGroup[temp.size()];
\r
345 for (int i = 0; i < temp.size(); i++)
\r
346 ret[i] = (SequenceGroup) temp.elementAt(i);
\r
352 public void addToGroup(SequenceGroup g, SequenceI s) {
\r
353 if (!(g.sequences.contains(s))) {
\r
354 g.sequences.addElement(s);
\r
359 public void removeFromGroup(SequenceGroup g, SequenceI s) {
\r
360 if ((g != null) && (g.sequences != null)) {
\r
361 if (g.sequences.contains(s)) {
\r
362 g.sequences.removeElement(s);
\r
364 if (g.sequences.size() == 0) {
\r
365 groups.removeElement(g);
\r
371 public void addSuperGroup(SuperGroup sg) {
\r
372 superGroup.addElement(sg);
\r
375 public void removeSuperGroup(SuperGroup sg) {
\r
376 superGroup.removeElement(sg);
\r
379 public SuperGroup getSuperGroup(SequenceGroup sg) {
\r
380 for (int i = 0; i < this.superGroup.size(); i++) {
\r
381 SuperGroup temp = (SuperGroup) superGroup.elementAt(i);
\r
383 if (temp.sequenceGroups.contains(sg)) {
\r
392 public void addGroup(SequenceGroup sg) {
\r
393 if (!groups.contains(sg)) {
\r
394 groups.addElement(sg);
\r
398 public void deleteAllGroups() {
\r
399 groups.removeAllElements();
\r
400 superGroup.removeAllElements();
\r
404 while (i < sequences.size()) {
\r
405 SequenceI s = getSequenceAt(i);
\r
406 s.setColor(java.awt.Color.white);
\r
412 public void deleteGroup(SequenceGroup g) {
\r
413 if (groups.contains(g)) {
\r
414 groups.removeElement(g);
\r
419 public SequenceI findName(String name) {
\r
422 while (i < sequences.size()) {
\r
423 SequenceI s = getSequenceAt(i);
\r
425 if (s.getName().equals(name)) {
\r
436 public SequenceI findbyDisplayId(String name) {
\r
439 while (i < sequences.size()) {
\r
440 SequenceI s = getSequenceAt(i);
\r
442 if (s.getDisplayId().equals(name)) {
\r
453 public int findIndex(SequenceI s) {
\r
456 while (i < sequences.size()) {
\r
457 if (s == getSequenceAt(i)) {
\r
467 public int getHeight() {
\r
468 return sequences.size();
\r
471 public int getWidth() {
\r
472 int maxLength = -1;
\r
474 for (int i = 0; i < sequences.size(); i++) {
\r
475 if (getSequenceAt(i).getLength() > maxLength) {
\r
476 maxLength = getSequenceAt(i).getLength();
\r
483 public int getMaxIdLength() {
\r
487 while (i < sequences.size()) {
\r
488 SequenceI seq = getSequenceAt(i);
\r
489 String tmp = seq.getName() + "/" + seq.getStart() + "-" +
\r
492 if (tmp.length() > max) {
\r
493 max = tmp.length();
\r
502 public void setGapCharacter(char gc) {
\r
505 for (int i = 0; i < sequences.size(); i++) {
\r
506 Sequence seq = (Sequence) sequences.elementAt(i);
\r
507 seq.sequence = seq.sequence.replace('.', gc);
\r
508 seq.sequence = seq.sequence.replace('-', gc);
\r
509 seq.sequence = seq.sequence.replace(' ', gc);
\r
513 public char getGapCharacter() {
\r
514 return gapCharacter;
\r
517 public Vector getAAFrequency() {
\r
518 return AAFrequency.calculate(sequences, 0, getWidth());
\r
521 public boolean isAligned() {
\r
522 int width = getWidth();
\r
524 for (int i = 0; i < sequences.size(); i++)
\r
525 if (getSequenceAt(i).getLength() != width) {
\r
532 public void deleteAnnotation(AlignmentAnnotation aa) {
\r
535 if (annotations != null) {
\r
536 aSize = annotations.length;
\r
539 AlignmentAnnotation[] temp = new AlignmentAnnotation[aSize - 1];
\r
543 for (int i = 0; i < aSize; i++) {
\r
544 if (annotations[i] == aa) {
\r
548 temp[tIndex] = annotations[i];
\r
552 annotations = temp;
\r
555 public void addAnnotation(AlignmentAnnotation aa) {
\r
558 if (annotations != null) {
\r
559 aSize = annotations.length + 1;
\r
562 AlignmentAnnotation[] temp = new AlignmentAnnotation[aSize];
\r
566 for (i = 0; i < (aSize - 1); i++)
\r
567 temp[i] = annotations[i];
\r
572 annotations = temp;
\r
575 public AlignmentAnnotation[] getAlignmentAnnotation() {
\r
576 return annotations;
\r