2 * Jalview - A Sequence Alignment Editor and Viewer
3 * Copyright (C) 2006 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
19 package jalview.analysis;
21 import jalview.datamodel.*;
27 * Calculates conservation values for a given set of sequences
32 public class Conservation
34 SequenceI [] sequences;
37 Vector seqNums; // vector of int vectors where first is sequence checksum
38 int maxLength = 0; // used by quality calcs
39 boolean seqNumsChanged = false; // updated after any change via calcSeqNum;
42 /** Stores calculated quality values */
43 public Vector quality;
45 /** Stores maximum and minimum values of quality values */
46 public Double[] qualityRange = new Double[2];
47 String consString = "";
48 Sequence consSequence;
55 * Creates a new Conservation object.
57 * @param name Name of conservation
58 * @param propHash DOCUMENT ME!
59 * @param threshold to count the residues in residueHash(). commonly used value is 3
60 * @param sequences sequences to be used in calculation
61 * @param start start residue position
62 * @param end end residue position
64 public Conservation(String name, Hashtable propHash, int threshold,
65 Vector sequences, int start, int end)
69 this.propHash = propHash;
70 this.threshold = threshold;
74 maxLength=end-start+1; // default width includes bounds of calculation
76 int s, sSize = sequences.size();
77 SequenceI[] sarray = new SequenceI[sSize];
78 this.sequences = sarray;
80 for (s = 0; s < sSize; s++)
82 sarray[s] = (SequenceI) sequences.elementAt(s);
83 if(sarray[s].getLength()>maxLength)
84 maxLength = sarray[s].getLength();
92 * @param i DOCUMENT ME!
94 private void calcSeqNum(int i)
96 String sq = null; // for dumb jbuilder not-inited exception warning
99 int sSize = sequences.length;
101 if ((i > -1) && (i < sSize))
103 sq = sequences[i].getSequence();
105 if (seqNums.size() <= i)
107 seqNums.addElement(new int[sq.length() + 1]);
110 if (sq.hashCode() != ((int[]) seqNums.elementAt(i))[0])
114 seqNumsChanged = true;
122 sqnum = new int[len + 1]; // better to always make a new array - sequence can change its length
123 sqnum[0] = sq.hashCode();
125 for (j = 1; j <= len; j++)
127 sqnum[j] = jalview.schemes.ResidueProperties.aaIndex[sq.charAt(j-1)];
131 seqNums.setElementAt(sqnum, i);
134 System.out.println("SEQUENCE HAS BEEN DELETED!!!");
138 // JBPNote INFO level debug
140 "ERROR: calcSeqNum called with out of range sequence index for Alignment\n");
145 * Calculates the conservation values for given set of sequences
147 public void calculate()
149 Hashtable resultHash, ht;
150 int thresh, j, jSize = sequences.length;
151 int[] values; // Replaces residueHash
152 String type, res=null;
154 Enumeration enumeration2;
156 total = new Hashtable[maxLength];
158 for (int i = start; i <= end; i++)
160 values = new int[132];
162 for (j = 0; j < jSize; j++)
164 if (sequences[j].getLength() > i)
166 c = sequences[j].getCharAt(i);
168 // No need to check if its a '-'
169 if (c == '.' || c == ' ')
172 if ('a' <= c && c <= 'z')
174 c -= (32);// 32 = 'a' - 'A'
185 //What is the count threshold to count the residues in residueHash()
186 thresh = (threshold * (jSize)) / 100;
188 //loop over all the found residues
189 resultHash = new Hashtable();
190 for (int v = '-'; v < 'Z'; v++)
193 if (values[v] > thresh)
195 res = String.valueOf( (char) v);
197 //Now loop over the properties
198 enumeration2 = propHash.keys();
200 while (enumeration2.hasMoreElements())
202 type = (String) enumeration2.nextElement();
203 ht = (Hashtable) propHash.get(type);
205 //Have we ticked this before?
206 if (!resultHash.containsKey(type))
208 if (ht.containsKey(res))
210 resultHash.put(type, ht.get(res));
214 resultHash.put(type, ht.get("-"));
217 else if (((Integer) resultHash.get(type)).equals(
218 (Integer) ht.get(res)) == false)
220 resultHash.put(type, new Integer(-1));
226 total[i-start] = resultHash;
233 * returns gap count in int[0], and conserved residue count in int[1]
235 public int[] countConsNGaps(int j)
240 int[] r = new int[2];
242 int i, iSize = sequences.length;
245 for (i = 0; i < iSize; i++)
247 if (j >= sequences[i].getLength())
253 c = sequences[i].getCharAt(j); // gaps do not have upper/lower case
255 if (jalview.util.Comparison.isGap((c)))
275 r[0] = (nres == cons) ? 1 : 0;
282 * Calculates the conservation sequence
284 * @param consflag if true, poitiveve conservation; false calculates negative conservation
285 * @param percentageGaps commonly used value is 25
287 public void verdict(boolean consflag, float percentageGaps)
289 StringBuffer consString = new StringBuffer();
295 Hashtable resultHash ;
296 Enumeration enumeration;
299 for (int i = start; i <= end; i++)
301 gapcons = countConsNGaps(i);
302 totGaps = gapcons[1];
303 pgaps = ((float) totGaps * 100) / (float) sequences.length;
305 if (percentageGaps > pgaps)
307 resultHash = total[i - start];
309 //Now find the verdict
311 enumeration = resultHash.keys();
313 while (enumeration.hasMoreElements())
315 type = (String) enumeration.nextElement();
316 result = (Integer) resultHash.get(type);
318 //Do we want to count +ve conservation or +ve and -ve cons.?
321 if (result.intValue() == 1)
328 if (result.intValue() != -1)
337 consString.append(count); // Conserved props!=Identity
341 consString.append((gapcons[0] == 1) ? "*" : "+");
346 consString.append("-");
350 consSequence = new Sequence(name, consString.toString(), start, end);
356 * @return Conservation sequence
358 public Sequence getConsSequence()
363 // From Alignment.java in jalview118
364 public void findQuality()
366 findQuality(0, maxLength - 1);
372 private void percentIdentity2()
374 seqNums = new Vector();
376 int i = 0, iSize = sequences.length;
377 //Do we need to calculate this again?
378 for (i = 0; i < iSize; i++)
384 if ((cons2 == null) || seqNumsChanged)
386 cons2 = new int[maxLength][24];
388 // Initialize the array
389 for (int j = 0; j < 24; j++)
391 for (i = 0; i < maxLength; i++)
400 while (j < sequences.length)
402 sqnum = (int[]) seqNums.elementAt(j);
404 for (i = 1; i < sqnum.length; i++)
406 cons2[i - 1][sqnum[i]]++;
409 for (i = sqnum.length - 1; i < maxLength; i++)
411 cons2[i][23]++; // gap count
419 /* for (int i=start; i <= end; i++) {
424 for (int j=0;j<24;j++) {
425 if (cons2[i][j] > max) {
437 * Calculates the quality of the set of sequences
439 * @param start Start residue
440 * @param end End residue
442 public void findQuality(int start, int end)
444 quality = new Vector();
447 int[][] BLOSUM62 = jalview.schemes.ResidueProperties.getBLOSUM62();
449 //Loop over columns // JBPNote Profiling info
450 //long ts = System.currentTimeMillis();
451 //long te = System.currentTimeMillis();
454 int size = seqNums.size();
455 int[] lengths = new int[size];
456 double tot, bigtot, sr, tmp;
458 int l, j, i, ii, i2, k, seqNum;
460 for (l = 0; l < size; l++)
461 lengths[l] = ((int[]) seqNums.elementAt(l)).length - 1;
463 for (j = start; j <= end; j++)
467 // First Xr = depends on column only
470 for (ii = 0; ii < 24; ii++)
474 for (i2 = 0; i2 < 24; i2++)
476 x[ii] += ( ( (double) cons2[j][i2] * BLOSUM62[ii][i2]) +
483 // Now calculate D for each position and sum
484 for (k = 0; k < size; k++)
488 seqNum = (j < lengths[k])
489 ? ((int[]) seqNums.elementAt(k))[j + 1] : 23; // Sequence, or gap at the end
491 // This is a loop over r
492 for (i = 0; i < 23; i++)
496 sr = (double) BLOSUM62[i][seqNum] + 4;
498 //Calculate X with another loop over residues
499 // System.out.println("Xi " + i + " " + x[i] + " " + sr);
502 tot += (xx[i] * xx[i]);
505 bigtot += Math.sqrt(tot);
508 // This is the quality for one column
514 // bigtot = bigtot * (size-cons2[j][23])/size;
515 quality.addElement(new Double(bigtot));
517 // Need to normalize by gaps
520 double newmax = -10000;
522 for (j = start; j <= end; j++)
524 tmp = ((Double) quality.elementAt(j)).doubleValue();
525 tmp = ((max - tmp) * (size - cons2[j][23])) / size;
527 // System.out.println(tmp+ " " + j);
528 quality.setElementAt(new Double(tmp), j);
536 // System.out.println("Quality " + s);
537 qualityRange[0] = new Double(0);
538 qualityRange[1] = new Double(newmax);