1 package jalview.datamodel;
\r
3 import jalview.jbgui.*;
\r
4 import jalview.schemes.*;
\r
5 import jalview.analysis.*;
\r
6 import jalview.util.*;
\r
9 /** Data structure to hold and manipulate a multiple sequence alignment
\r
11 public class Alignment implements AlignmentI
\r
14 protected Vector sequences;
\r
15 protected Vector groups = new Vector();
\r
16 public Hashtable[] cons;
\r
17 protected String gapCharacter = ".";
\r
19 /** Make an alignment from an array of Sequences.
\r
23 public Alignment(SequenceI[] seqs) {
\r
24 sequences = new Vector();
\r
26 for (int i=0; i < seqs.length; i++) {
\r
27 sequences.addElement(seqs[i]);
\r
30 groups.addElement(new SequenceGroup());
\r
34 while (i < seqs.length) {
\r
35 addToGroup((SequenceGroup)groups.elementAt(0),seqs[i]);
\r
42 public Vector getSequences() {
\r
46 public SequenceI getSequenceAt(int i) {
\r
47 if (i < sequences.size()) {
\r
48 return (SequenceI)sequences.elementAt(i);
\r
54 /** Adds a sequence to the alignment. Recalculates maxLength and size.
\r
55 * Should put the new sequence in a sequence group!!!
\r
59 public void addSequence(SequenceI snew) {
\r
60 sequences.addElement(snew);
\r
62 ((SequenceGroup)groups.lastElement()).addSequence(snew);
\r
65 public void addSequence(SequenceI[] seq) {
\r
66 for (int i=0; i < seq.length; i++) {
\r
67 addSequence(seq[i]);
\r
71 /** Adds a sequence to the alignment. Recalculates maxLength and size.
\r
72 * Should put the new sequence in a sequence group!!!
\r
76 public void setSequenceAt(int i,SequenceI snew) {
\r
77 SequenceI oldseq = getSequenceAt(i);
\r
78 deleteSequence(oldseq);
\r
80 sequences.setElementAt(snew,i);
\r
82 ((SequenceGroup)groups.lastElement()).addSequence(snew);
\r
85 public Vector getGroups() {
\r
89 /** Sorts the sequences by sequence group size - largest to smallest.
\r
92 public void sortGroups() {
\r
93 float[] arr = new float [groups.size()];
\r
94 Object[] s = new Object[groups.size()];
\r
96 for (int i=0; i < groups.size(); i++) {
\r
97 arr[i] = ((SequenceGroup)groups.elementAt(i)).sequences.size();
\r
98 s[i] = groups.elementAt(i);
\r
101 QuickSort.sort(arr,s);
\r
103 Vector newg = new Vector(groups.size());
\r
105 for (int i=groups.size()-1; i >= 0; i--) {
\r
106 newg.addElement(s[i]);
\r
112 /** Takes out columns consisting entirely of gaps (-,.," ")
\r
114 public void removeGaps()
\r
118 int iSize = getWidth();
\r
119 for (int i=0; i < iSize; i++)
\r
121 boolean delete = true;
\r
122 for (int j=0; j < getHeight(); j++)
\r
124 current = getSequenceAt(j);
\r
125 if (current.getLength() > i)
\r
127 /* MC Should move this to a method somewhere */
\r
128 if (current.getCharAt(i)!='-' && current.getCharAt(i)!='.' && current.getCharAt(i)!=' ')
\r
136 deleteColumns(i,i);
\r
145 /** Returns an array of Sequences containing columns
\r
146 * start to end (inclusive) only.
\r
148 * @param start start column to fetch
\r
149 * @param end end column to fetch
\r
150 * @return Array of Sequences, ready to put into a new Alignment
\r
152 public SequenceI[] getColumns(int start, int end) {
\r
153 return getColumns(0,getHeight()-1,start,end);
\r
156 /** Removes a range of columns (start to end inclusive).
\r
158 * @param start Start column in the alignment
\r
159 * @param end End column in the alignment
\r
161 public void deleteColumns(int start, int end) {
\r
162 deleteColumns(0,getHeight()-1,start,end);
\r
165 public void deleteColumns(int seq1, int seq2, int start, int end) {
\r
167 for (int i=0; i <= (end-start); i++) {
\r
168 for (int j=seq1; j <= seq2; j++) {
\r
169 getSequenceAt(j).deleteCharAt(start);
\r
174 public void insertColumns(SequenceI[] seqs, int pos) {
\r
175 if (seqs.length == getHeight()) {
\r
176 for (int i=0; i < getHeight();i++) {
\r
177 String tmp = new String(getSequenceAt(i).getSequence());
\r
178 getSequenceAt(i).setSequence(tmp.substring(0,pos) + seqs[i].getSequence() + tmp.substring(pos));
\r
184 public SequenceI[] getColumns(int seq1, int seq2, int start, int end) {
\r
185 SequenceI[] seqs = new Sequence[(seq2-seq1)+1];
\r
186 for (int i=seq1; i<= seq2; i++ ) {
\r
187 seqs[i] = new Sequence(getSequenceAt(i).getName(),
\r
188 getSequenceAt(i).getSequence().substring(start,end),
\r
189 getSequenceAt(i).findPosition(start),
\r
190 getSequenceAt(i).findPosition(end));
\r
195 public void trimLeft(int i) {
\r
196 for (int j = 0;j< getHeight();j++) {
\r
198 SequenceI s = getSequenceAt(j);
\r
199 int newstart = s.findPosition(i);
\r
201 s.setStart(newstart);
\r
202 s.setSequence(s.getSequence().substring(i));
\r
207 public void trimRight(int i) {
\r
208 for (int j = 0;j< getHeight();j++) {
\r
209 SequenceI s = getSequenceAt(j);
\r
210 int newend = s.findPosition(i);
\r
213 s.setSequence(s.getSequence().substring(0,i+1));
\r
217 public void deleteSequence(SequenceI s)
\r
219 for (int i=0; i < getHeight(); i++)
\r
220 if (getSequenceAt(i) == s)
\r
224 public void deleteSequence(int i)
\r
226 sequences.removeElementAt(i);
\r
230 public Vector removeRedundancy(float threshold, Vector sel) {
\r
231 Vector del = new Vector();
\r
233 for (int i=1; i < sel.size(); i++) {
\r
234 for (int j = 0; j < i; j++) {
\r
235 // Only do the comparison if either have not been deleted
\r
236 if (!del.contains((SequenceI)sel.elementAt(i)) ||
\r
237 !del.contains((SequenceI)sel.elementAt(j))) {
\r
239 float pid = Comparison.compare((SequenceI)sel.elementAt(j),
\r
240 (SequenceI)sel.elementAt(i));
\r
242 if (pid >= threshold) {
\r
243 // Delete the shortest one
\r
244 if (((SequenceI)sel.elementAt(j)).getSequence().length() >
\r
245 ((SequenceI)sel.elementAt(i)).getSequence().length()) {
\r
246 del.addElement(sel.elementAt(i));
\r
247 System.out.println("Deleting sequence " + ((SequenceI)sel.elementAt(i)).getName());
\r
249 del.addElement(sel.elementAt(i));
\r
250 System.out.println("Deleting sequence " + ((SequenceI)sel.elementAt(i)).getName());
\r
257 // Now delete the sequences
\r
258 for (int i=0; i < del.size(); i++) {
\r
259 System.out.println("Deleting sequence " + ((SequenceI)del.elementAt(i)).getName());
\r
260 deleteSequence((SequenceI)del.elementAt(i));
\r
266 public void sortByPID(SequenceI s) {
\r
268 float scores[] = new float[getHeight()];
\r
269 SequenceI seqs[] = new SequenceI[getHeight()];
\r
271 for (int i = 0; i < getHeight(); i++) {
\r
272 scores[i] = Comparison.compare(getSequenceAt(i),s);
\r
273 seqs[i] = getSequenceAt(i);
\r
276 QuickSort.sort(scores,0,scores.length-1,seqs);
\r
280 if (getHeight()%2 == 0) {
\r
281 len = getHeight()/2;
\r
283 len = (getHeight()+1)/2;
\r
286 for (int i = 0; i < len; i++) {
\r
287 SequenceI tmp = seqs[i];
\r
288 sequences.setElementAt(seqs[getHeight()-i-1],i);
\r
289 sequences.setElementAt(tmp,getHeight()-i-1);
\r
293 public void sortByID() {
\r
294 String ids[] = new String[getHeight()];
\r
295 SequenceI seqs[] = new SequenceI[getHeight()];
\r
297 for (int i = 0; i < getHeight(); i++) {
\r
298 ids[i] = getSequenceAt(i).getName();
\r
299 seqs[i] = getSequenceAt(i);
\r
302 QuickSort.sort(ids,seqs);
\r
306 if (getHeight()%2 == 0) {
\r
307 len = getHeight()/2;
\r
309 len = (getHeight()+1)/2;
\r
310 System.out.println("Sort len is odd = " + len);
\r
312 for (int i = 0; i < len; i++) {
\r
313 System.out.println("Swapping " + seqs[i].getName() + " and " + seqs[getHeight()-i-1].getName());
\r
314 SequenceI tmp = seqs[i];
\r
315 sequences.setElementAt(seqs[getHeight()-i-1],i);
\r
316 sequences.setElementAt(tmp,getHeight()-i-1);
\r
321 public SequenceGroup findGroup(int i) {
\r
322 return findGroup(getSequenceAt(i));
\r
326 public SequenceGroup findGroup(SequenceI s) {
\r
327 for (int i = 0; i < this.groups.size();i++) {
\r
328 SequenceGroup sg = (SequenceGroup)groups.elementAt(i);
\r
329 if (sg.sequences.contains(s)) {
\r
337 public void addToGroup(SequenceGroup g, SequenceI s) {
\r
338 if (!(g.sequences.contains(s))) {
\r
339 g.sequences.addElement(s);
\r
343 public void removeFromGroup(SequenceGroup g,SequenceI s) {
\r
344 if (g != null && g.sequences != null) {
\r
345 if (g.sequences.contains(s)) {
\r
346 g.sequences.removeElement(s);
\r
347 if (g.sequences.size() == 0) {
\r
348 groups.removeElement(g);
\r
355 public void addGroup(SequenceGroup sg) {
\r
356 groups.addElement(sg);
\r
360 public SequenceGroup addGroup() {
\r
361 SequenceGroup sg = new SequenceGroup();
\r
362 groups.addElement(sg);
\r
367 public void deleteGroup(SequenceGroup g) {
\r
368 if (groups.contains(g)) {
\r
369 groups.removeElement(g);
\r
374 public SequenceI findName(String name) {
\r
376 while (i < sequences.size()) {
\r
377 SequenceI s = getSequenceAt(i);
\r
378 if (s.getName().equals(name)) {
\r
387 public int findIndex(SequenceI s) {
\r
389 while (i < sequences.size()) {
\r
390 if (s == getSequenceAt(i)) {
\r
398 public int getHeight() {
\r
399 return sequences.size();
\r
403 public int getWidth()
\r
405 int maxLength = -1;
\r
406 for (int i = 0; i < sequences.size(); i++)
\r
408 if (getSequenceAt(i).getLength() > maxLength)
\r
409 maxLength = getSequenceAt(i).getLength();
\r
416 public int getMaxIdLength() {
\r
420 while (i < sequences.size()) {
\r
421 SequenceI seq = getSequenceAt(i);
\r
422 String tmp = seq.getName() + "/" + seq.getStart() + "-" + seq.getEnd();
\r
424 if (tmp.length() > max) {
\r
425 max = tmp.length();
\r
433 public void setGapCharacter(String gc) {
\r
437 public String getGapCharacter() {
\r
438 return gapCharacter;
\r
441 public Vector getAAFrequency()
\r
443 return AAFrequency.calculate(sequences, 0, getWidth());
\r