/* * Jalview - A Sequence Alignment Editor and Viewer * Copyright (C) 2005 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */ package jalview.analysis; import jalview.datamodel.*; import java.util.*; /** * Calculates conservation values for a given set of sequences * * @author $author$ * @version $Revision$ */ public class Conservation { Vector sequences; int start; int end; Vector seqNums; // vector of int vectors where first is sequence checksum int maxLength = 0; // used by quality calcs boolean seqNumsChanged = false; // updated after any change via calcSeqNum; Vector total = new Vector(); /** Stores calculated quality values */ public Vector quality; /** Stores maximum and minimum values of quality values */ public Double[] qualityRange = new Double[2]; String consString = ""; Sequence consSequence; Hashtable propHash; int threshold; String name = ""; int[][] cons2; /** * Creates a new Conservation object. * * @param name Name of conservation * @param propHash DOCUMENT ME! * @param threshold to count the residues in residueHash(). commonly used value is 3 * @param sequences sequences to be used in calculation * @param start start residue position * @param end end residue position */ public Conservation(String name, Hashtable propHash, int threshold, Vector sequences, int start, int end) { this.name = name; this.propHash = propHash; this.threshold = threshold; this.sequences = sequences; this.start = start; this.end = end; seqNums = new Vector(sequences.size()); calcSeqNums(); } /** * DOCUMENT ME! */ private void calcSeqNums() { int i=0, iSize=sequences.size(); for (i=0; i < iSize; i++) { calcSeqNum(i); } } /** * DOCUMENT ME! * * @param i DOCUMENT ME! */ private void calcSeqNum(int i) { String sq = null; // for dumb jbuilder not-inited exception warning int[] sqnum = null; if ((i > -1) && (i < sequences.size())) { sq = ((SequenceI) sequences.elementAt(i)).getSequence(); if (seqNums.size() <= i) { seqNums.addElement(new int[sq.length() + 1]); } if (sq.hashCode() != ((int[]) seqNums.elementAt(i))[0]) { int j; int len; seqNumsChanged = true; sq = ((SequenceI) sequences.elementAt(i)).getSequence(); len = sq.length(); if (maxLength < len) { maxLength = len; } sqnum = new int[len + 1]; // better to always make a new array - sequence can change its length sqnum[0] = sq.hashCode(); for (j = 1; j <= len; j++) { sqnum[j] = ((Integer) jalview.schemes.ResidueProperties.aaHash.get(Character.toString( sq.charAt(j - 1)))).intValue(); // yuk - JBPNote - case taken care of in aaHash } seqNums.setElementAt(sqnum, i); } } else { // JBPNote INFO level debug System.err.println( "ERROR: calcSeqNum called with out of range sequence index for Alignment\n"); } } /** * Calculates the conservation values for given set of sequences */ public void calculate() { Hashtable resultHash, residueHash, ht; int count, thresh, j, jSize = sequences.size(); String sequence, res, type; Enumeration enumeration, enumeration2; for (int i = start; i <= end; i++) { resultHash = new Hashtable(); residueHash = new Hashtable(); for (j = 0; j < jSize; j++) { // JBPNote - have to make sure elements of the sequences vector // are tested like this everywhere... if (sequences.elementAt(j) instanceof Sequence) { sequence = ((Sequence) sequences.elementAt(j)).getSequence(); if (sequence.length() > i) { res = Character.toString(Character.toUpperCase(sequence.charAt(i))); if (residueHash.containsKey(res)) { count = ((Integer) residueHash.get(res)).intValue(); count++; residueHash.put(res, new Integer(count)); } else { residueHash.put(res, new Integer(1)); } } else { if (residueHash.containsKey("-")) { count = ((Integer) residueHash.get("-")).intValue(); count++; residueHash.put("-", new Integer(count)); } else { residueHash.put("-", new Integer(1)); } } } } //What is the count threshold to count the residues in residueHash() thresh = (threshold * (sequences.size())) / 100; //loop over all the found residues enumeration = residueHash.keys(); while (enumeration.hasMoreElements()) { res = (String) enumeration.nextElement(); if (((Integer) residueHash.get(res)).intValue() > thresh) { //Now loop over the properties enumeration2 = propHash.keys(); while (enumeration2.hasMoreElements()) { type = (String) enumeration2.nextElement(); ht = (Hashtable) propHash.get(type); //Have we ticked this before? if (!resultHash.containsKey(type)) { if (ht.containsKey(res)) { resultHash.put(type, ht.get(res)); } else { resultHash.put(type, ht.get("-")); } } else if (((Integer) resultHash.get(type)).equals( (Integer) ht.get(res)) == false) { resultHash.put(type, new Integer(-1)); } } } } total.addElement(resultHash); } } /*** * countConsNGaps * returns gap count in int[0], and conserved residue count in int[1] */ public int[] countConsNGaps(int j) { int count = 0; int cons = 0; int nres = 0; int[] r = new int[2]; char f = '$'; int i, iSize = sequences.size(); char c; for (i = 0; i < iSize; i++) { if (j >= ((Sequence) sequences.elementAt(i)).getLength()) { count++; continue; } c = ((Sequence) sequences.elementAt(i)).getCharAt(j); // gaps do not have upper/lower case if (jalview.util.Comparison.isGap((c))) { count++; } else { nres++; if (nres == 1) { f = c; cons++; } else if (f == c) { cons++; } } } r[0] = (nres == cons) ? 1 : 0; r[1] = count; return r; } /** * Calculates the conservation sequence * * @param consflag if true, poitiveve conservation; false calculates negative conservation * @param percentageGaps commonly used value is 25 */ public void verdict(boolean consflag, float percentageGaps) { StringBuffer consString = new StringBuffer(); String type; Integer result; int[] gapcons; int totGaps, count; float pgaps; Hashtable resultHash ; Enumeration enumeration; for (int i = start; i <= end; i++) { gapcons = countConsNGaps(i); totGaps = gapcons[1]; pgaps = ((float) totGaps * 100) / (float) sequences.size(); if (percentageGaps > pgaps) { resultHash = (Hashtable) total.elementAt(i - start); //Now find the verdict count = 0; enumeration = resultHash.keys(); while (enumeration.hasMoreElements()) { type = (String) enumeration.nextElement(); result = (Integer) resultHash.get(type); //Do we want to count +ve conservation or +ve and -ve cons.? if (consflag) { if (result.intValue() == 1) { count++; } } else { if (result.intValue() != -1) { count++; } } } if (count < 10) { consString.append(count); // Conserved props!=Identity } else { consString.append((gapcons[0] == 1) ? "*" : "+"); } } else { consString.append("-"); } } consSequence = new Sequence(name, consString.toString(), start, end); } /** * * * @return Conservation sequence */ public Sequence getConsSequence() { return consSequence; } // From Alignment.java in jalview118 public void findQuality() { findQuality(0, maxLength - 1); } /** * DOCUMENT ME! */ private void percentIdentity2() { calcSeqNums(); // updates maxLength, too. if ((cons2 == null) || seqNumsChanged) { cons2 = new int[maxLength][24]; // Initialize the array for (int j = 0; j < 24; j++) { for (int i = 0; i < maxLength; i++) { cons2[i][j] = 0; } } int[] sqnum; int j = 0; while (j < sequences.size()) { sqnum = (int[]) seqNums.elementAt(j); for (int i = 1; i < sqnum.length; i++) { cons2[i - 1][sqnum[i]]++; } for (int i = sqnum.length - 1; i < maxLength; i++) { cons2[i][23]++; // gap count } j++; } // unnecessary ? /* for (int i=start; i <= end; i++) { int max = -1000; int maxi = -1; int maxj = -1; for (int j=0;j<24;j++) { if (cons2[i][j] > max) { max = cons2[i][j]; maxi = i; maxj = j; } } } */ } } /** * Calculates the quality of the set of sequences * * @param start Start residue * @param end End residue */ public void findQuality(int start, int end) { quality = new Vector(); double max = -10000; int[][] BLOSUM62 = jalview.schemes.ResidueProperties.getBLOSUM62(); //Loop over columns // JBPNote Profiling info // long ts = System.currentTimeMillis(); //long te = System.currentTimeMillis(); percentIdentity2(); int size = seqNums.size(); int[] lengths = new int[size]; double tot, bigtot, sr, tmp; double [] x, xx; int l, j, i, ii, seqNum; for (l = 0; l < size; l++) lengths[l] = ((int[]) seqNums.elementAt(l)).length - 1; for (j = start; j <= end; j++) { bigtot = 0; // First Xr = depends on column only x = new double[24]; for (ii = 0; ii < 24; ii++) { x[ii] = 0; try { for (int i2 = 0; i2 < 24; i2++) { x[ii] += (((double) cons2[j][i2] * BLOSUM62[ii][i2]) + 4); } } catch (Exception e) { System.err.println("Exception during quality calculation."); e.printStackTrace(); } //System.out.println("X " + ii + " " + x[ii]); x[ii] /= (size); //System.out.println("X " + ii + " " + x[ii]); } // Now calculate D for each position and sum for (int k = 0; k < size; k++) { tot = 0; xx = new double[24]; seqNum = (j < lengths[k]) ? ((int[]) seqNums.elementAt(k))[j + 1] : 23; // Sequence, or gap at the end // This is a loop over r for (i = 0; i < 23; i++) { sr = 0; try { sr = (double) BLOSUM62[i][seqNum] + 4; } catch (Exception e) { System.out.println("Exception in sr: " + e); e.printStackTrace(); } //Calculate X with another loop over residues // System.out.println("Xi " + i + " " + x[i] + " " + sr); xx[i] = x[i] - sr; tot += (xx[i] * xx[i]); } bigtot += Math.sqrt(tot); } // This is the quality for one column if (max < bigtot) { max = bigtot; } // bigtot = bigtot * (size-cons2[j][23])/size; quality.addElement(new Double(bigtot)); // Need to normalize by gaps } double newmax = -10000; for (j = start; j <= end; j++) { tmp = ((Double) quality.elementAt(j)).doubleValue(); tmp = ((max - tmp) * (size - cons2[j][23])) / size; // System.out.println(tmp+ " " + j); quality.setElementAt(new Double(tmp), j); if (tmp > newmax) { newmax = tmp; } } // System.out.println("Quality " + s); qualityRange[0] = new Double(0); qualityRange[1] = new Double(newmax); } }