X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fanalysis%2FConservation.java;h=b9b68cec25198d3bf1a175cf0efb7e1d09818bff;hb=0039639a8b6fa7a9bfc131c83a493b5a696f0104;hp=bfb2e004f75502a1e2e520e3fb3e92f390257069;hpb=588042b69abf8e60bcc950b24c283933c7dd422f;p=jalview.git diff --git a/src/jalview/analysis/Conservation.java b/src/jalview/analysis/Conservation.java index bfb2e00..b9b68ce 100755 --- a/src/jalview/analysis/Conservation.java +++ b/src/jalview/analysis/Conservation.java @@ -20,12 +20,17 @@ package jalview.analysis; import jalview.datamodel.*; -import jalview.gui.*; - import java.util.*; -public class Conservation { +/** + * Calculates conservation values for a given set of sequences + * + * @author $author$ + * @version $Revision$ + */ +public class Conservation +{ Vector sequences; int start; int end; @@ -33,7 +38,11 @@ public class Conservation { int maxLength = 0; // used by quality calcs boolean seqNumsChanged = false; // updated after any change via calcSeqNum; Vector total = new Vector(); + + /** Stores calculated quality values */ public Vector quality; + + /** Stores maximum and minimum values of quality values */ public Double[] qualityRange = new Double[2]; String consString = ""; Sequence consSequence; @@ -42,8 +51,19 @@ public class Conservation { String name = ""; int[][] cons2; + /** + * Creates a new Conservation object. + * + * @param name Name of conservation + * @param propHash DOCUMENT ME! + * @param threshold to count the residues in residueHash(). commonly used value is 3 + * @param sequences sequences to be used in calculation + * @param start start residue position + * @param end end residue position + */ public Conservation(String name, Hashtable propHash, int threshold, - Vector sequences, int start, int end) { + Vector sequences, int start, int end) + { this.name = name; this.propHash = propHash; this.threshold = threshold; @@ -54,81 +74,118 @@ public class Conservation { calcSeqNums(); } - private void calcSeqNums() { - for (int i = 0; i < sequences.size(); i++) { + /** + * DOCUMENT ME! + */ + private void calcSeqNums() + { + int i=0, iSize=sequences.size(); + for (i=0; i < iSize; i++) + { calcSeqNum(i); } } - private void calcSeqNum(int i) { + /** + * DOCUMENT ME! + * + * @param i DOCUMENT ME! + */ + private void calcSeqNum(int i) + { String sq = null; // for dumb jbuilder not-inited exception warning int[] sqnum = null; - if ((i > -1) && (i < sequences.size())) { + if ((i > -1) && (i < sequences.size())) + { sq = ((SequenceI) sequences.elementAt(i)).getSequence(); - if (seqNums.size() <= i) { + if (seqNums.size() <= i) + { seqNums.addElement(new int[sq.length() + 1]); } - if (sq.hashCode() != ((int[]) seqNums.elementAt(i))[0]) { + if (sq.hashCode() != ((int[]) seqNums.elementAt(i))[0]) + { int j; int len; seqNumsChanged = true; sq = ((SequenceI) sequences.elementAt(i)).getSequence(); len = sq.length(); - if (maxLength < len) { + if (maxLength < len) + { maxLength = len; } sqnum = new int[len + 1]; // better to always make a new array - sequence can change its length sqnum[0] = sq.hashCode(); - for (j = 1; j <= len; j++) { - sqnum[j] = ((Integer) jalview.schemes.ResidueProperties.aaHash.get(new String( - sq.substring(j - 1, j)))).intValue(); // yuk + for (j = 1; j <= len; j++) + { + sqnum[j] = ((Integer) jalview.schemes.ResidueProperties.aaHash.get(String.valueOf( + sq.charAt(j - 1)))).intValue(); // yuk - JBPNote - case taken care of in aaHash } seqNums.setElementAt(sqnum, i); } - } else { + } + else + { // JBPNote INFO level debug System.err.println( "ERROR: calcSeqNum called with out of range sequence index for Alignment\n"); } } - public void calculate() { - for (int i = start; i <= end; i++) { - Hashtable resultHash = null; - Hashtable residueHash = null; - + /** + * Calculates the conservation values for given set of sequences + */ + public void calculate() + { + Hashtable resultHash, residueHash, ht; + int count, thresh, j, jSize = sequences.size(); + String sequence, res, type; + Enumeration enumeration, enumeration2; + + for (int i = start; i <= end; i++) + { resultHash = new Hashtable(); residueHash = new Hashtable(); - for (int j = 0; j < sequences.size(); j++) { + for (j = 0; j < jSize; j++) + { // JBPNote - have to make sure elements of the sequences vector // are tested like this everywhere... - if (sequences.elementAt(j) instanceof Sequence) { - Sequence s = (Sequence) sequences.elementAt(j); + if (sequences.elementAt(j) instanceof Sequence) + { + sequence = ((Sequence) sequences.elementAt(j)).getSequence(); - if (s.getLength() > i) { - String res = s.getSequence().substring(i, i + 1); + if (sequence.length() > i) + { + res = String.valueOf(Character.toUpperCase(sequence.charAt(i))); - if (residueHash.containsKey(res)) { - int count = ((Integer) residueHash.get(res)).intValue(); + if (residueHash.containsKey(res)) + { + count = ((Integer) residueHash.get(res)).intValue(); count++; residueHash.put(res, new Integer(count)); - } else { + } + else + { residueHash.put(res, new Integer(1)); } - } else { - if (residueHash.containsKey("-")) { - int count = ((Integer) residueHash.get("-")).intValue(); + } + else + { + if (residueHash.containsKey("-")) + { + count = ((Integer) residueHash.get("-")).intValue(); count++; residueHash.put("-", new Integer(count)); - } else { + } + else + { residueHash.put("-", new Integer(1)); } } @@ -136,31 +193,40 @@ public class Conservation { } //What is the count threshold to count the residues in residueHash() - int thresh = (threshold * (sequences.size())) / 100; + thresh = (threshold * (sequences.size())) / 100; //loop over all the found residues - Enumeration e = residueHash.keys(); + enumeration = residueHash.keys(); - while (e.hasMoreElements()) { - String res = (String) e.nextElement(); + while (enumeration.hasMoreElements()) + { + res = (String) enumeration.nextElement(); - if (((Integer) residueHash.get(res)).intValue() > thresh) { + if (((Integer) residueHash.get(res)).intValue() > thresh) + { //Now loop over the properties - Enumeration e2 = propHash.keys(); + enumeration2 = propHash.keys(); - while (e2.hasMoreElements()) { - String type = (String) e2.nextElement(); - Hashtable ht = (Hashtable) propHash.get(type); + while (enumeration2.hasMoreElements()) + { + type = (String) enumeration2.nextElement(); + ht = (Hashtable) propHash.get(type); //Have we ticked this before? - if (!resultHash.containsKey(type)) { - if (ht.containsKey(res)) { + if (!resultHash.containsKey(type)) + { + if (ht.containsKey(res)) + { resultHash.put(type, ht.get(res)); - } else { + } + else + { resultHash.put(type, ht.get("-")); } - } else if (((Integer) resultHash.get(type)).equals( - (Integer) ht.get(res)) == false) { + } + else if (((Integer) resultHash.get(type)).equals( + (Integer) ht.get(res)) == false) + { resultHash.put(type, new Integer(-1)); } } @@ -171,56 +237,46 @@ public class Conservation { } } - public int countGaps(int j) { - int count = 0; - - for (int i = 0; i < sequences.size(); i++) { - if ((j + 1) > ((Sequence) sequences.elementAt(i)).getSequence() - .length()) { - count++; - - continue; - } - - char c = ((Sequence) sequences.elementAt(i)).getSequence().charAt(j); - - if (jalview.util.Comparison.isGap((c))) { - count++; - } - } - - return count; - } /*** * countConsNGaps * returns gap count in int[0], and conserved residue count in int[1] */ - public int[] countConsNGaps(int j) { + public int[] countConsNGaps(int j) + { int count = 0; int cons = 0; int nres = 0; int[] r = new int[2]; char f = '$'; + int i, iSize = sequences.size(); + char c; - for (int i = 0; i < sequences.size(); i++) { - if (j >= ((Sequence) sequences.elementAt(i)).getSequence().length()) { + for (i = 0; i < iSize; i++) + { + if (j >= ((Sequence) sequences.elementAt(i)).getLength()) + { count++; - continue; } - char c = ((Sequence) sequences.elementAt(i)).getSequence().charAt(j); + c = ((Sequence) sequences.elementAt(i)).getCharAt(j); // gaps do not have upper/lower case - if (jalview.util.Comparison.isGap((c))) { + if (jalview.util.Comparison.isGap((c))) + { count++; - } else { + } + else + { nres++; - if (nres == 1) { + if (nres == 1) + { f = c; cons++; - } else if (f == c) { + } + else if (f == c) + { cons++; } } @@ -232,70 +288,110 @@ public class Conservation { return r; } - public void verdict(boolean consflag, float percentageGaps) { - String consString = ""; - - for (int i = start; i <= end; i++) { - int[] gapcons = countConsNGaps(i); - boolean cons = (gapcons[0] == 1) ? true : false; - int totGaps = gapcons[1]; - float pgaps = ((float) totGaps * 100) / (float) sequences.size(); - - // System.out.println("percentage gaps = "+pgaps+"\n"); - if (percentageGaps > pgaps) { - Hashtable resultHash = (Hashtable) total.elementAt(i - start); + /** + * Calculates the conservation sequence + * + * @param consflag if true, poitiveve conservation; false calculates negative conservation + * @param percentageGaps commonly used value is 25 + */ + public void verdict(boolean consflag, float percentageGaps) + { + StringBuffer consString = new StringBuffer(); + String type; + Integer result; + int[] gapcons; + int totGaps, count; + float pgaps; + Hashtable resultHash ; + Enumeration enumeration; + + + for (int i = start; i <= end; i++) + { + gapcons = countConsNGaps(i); + totGaps = gapcons[1]; + pgaps = ((float) totGaps * 100) / (float) sequences.size(); + + if (percentageGaps > pgaps) + { + resultHash = (Hashtable) total.elementAt(i - start); //Now find the verdict - int count = 0; - Enumeration e3 = resultHash.keys(); + count = 0; + enumeration = resultHash.keys(); - while (e3.hasMoreElements()) { - String type = (String) e3.nextElement(); - Integer result = (Integer) resultHash.get(type); + while (enumeration.hasMoreElements()) + { + type = (String) enumeration.nextElement(); + result = (Integer) resultHash.get(type); //Do we want to count +ve conservation or +ve and -ve cons.? - if (consflag) { - if (result.intValue() == 1) { + if (consflag) + { + if (result.intValue() == 1) + { count++; } - } else { - if (result.intValue() != -1) { + } + else + { + if (result.intValue() != -1) + { count++; } } } - if (count < 10) { - consString = consString + String.valueOf(count); // Conserved props!=Identity - } else { - consString = consString + ((gapcons[0] == 1) ? "*" : "+"); + if (count < 10) + { + consString.append(count); // Conserved props!=Identity + } + else + { + consString.append((gapcons[0] == 1) ? "*" : "+"); } - } else { - consString = consString + "-"; + } + else + { + consString.append("-"); } } - consSequence = new Sequence(name, consString, start, end); + consSequence = new Sequence(name, consString.toString(), start, end); } - public Sequence getConsSequence() { + /** + * + * + * @return Conservation sequence + */ + public Sequence getConsSequence() + { return consSequence; } // From Alignment.java in jalview118 - public void findQuality() { + public void findQuality() + { findQuality(0, maxLength - 1); } - private void percentIdentity2() { + /** + * DOCUMENT ME! + */ + private void percentIdentity2() + { calcSeqNums(); // updates maxLength, too. - if ((cons2 == null) || seqNumsChanged) { + if ((cons2 == null) || seqNumsChanged) + { cons2 = new int[maxLength][24]; // Initialize the array - for (int j = 0; j < 24; j++) { - for (int i = 0; i < maxLength; i++) { + for (int j = 0; j < 24; j++) + { + for (int i = 0; i < maxLength; i++) + { cons2[i][j] = 0; } } @@ -303,14 +399,17 @@ public class Conservation { int[] sqnum; int j = 0; - while (j < sequences.size()) { + while (j < sequences.size()) + { sqnum = (int[]) seqNums.elementAt(j); - for (int i = 1; i < sqnum.length; i++) { + for (int i = 1; i < sqnum.length; i++) + { cons2[i - 1][sqnum[i]]++; } - for (int i = sqnum.length - 1; i < maxLength; i++) { + for (int i = sqnum.length - 1; i < maxLength; i++) + { cons2[i][23]++; // gap count } @@ -336,11 +435,17 @@ public class Conservation { } } - public void findQuality(int start, int end) { + /** + * Calculates the quality of the set of sequences + * + * @param start Start residue + * @param end End residue + */ + public void findQuality(int start, int end) + { quality = new Vector(); double max = -10000; - String s = ""; int[][] BLOSUM62 = jalview.schemes.ResidueProperties.getBLOSUM62(); //Loop over columns // JBPNote Profiling info @@ -350,25 +455,35 @@ public class Conservation { int size = seqNums.size(); int[] lengths = new int[size]; + double tot, bigtot, sr, tmp; + double [] x, xx; + int l, j, i, ii, seqNum; - for (int l = 0; l < size; l++) + for (l = 0; l < size; l++) lengths[l] = ((int[]) seqNums.elementAt(l)).length - 1; - for (int j = start; j <= end; j++) { - double bigtot = 0; + + for (j = start; j <= end; j++) + { + bigtot = 0; // First Xr = depends on column only - double[] x = new double[24]; + x = new double[24]; - for (int ii = 0; ii < 24; ii++) { + for (ii = 0; ii < 24; ii++) + { x[ii] = 0; - try { - for (int i2 = 0; i2 < 24; i2++) { + try + { + for (int i2 = 0; i2 < 24; i2++) + { x[ii] += (((double) cons2[j][i2] * BLOSUM62[ii][i2]) + 4); } - } catch (Exception e) { + } + catch (Exception e) + { System.err.println("Exception during quality calculation."); e.printStackTrace(); } @@ -380,19 +495,24 @@ public class Conservation { } // Now calculate D for each position and sum - for (int k = 0; k < size; k++) { - double tot = 0; - double[] xx = new double[24]; - int seqNum = (j < lengths[k]) + for (int k = 0; k < size; k++) + { + tot = 0; + xx = new double[24]; + seqNum = (j < lengths[k]) ? ((int[]) seqNums.elementAt(k))[j + 1] : 23; // Sequence, or gap at the end // This is a loop over r - for (int i = 0; i < 23; i++) { - double sr = 0; + for (i = 0; i < 23; i++) + { + sr = 0; - try { + try + { sr = (double) BLOSUM62[i][seqNum] + 4; - } catch (Exception e) { + } + catch (Exception e) + { System.out.println("Exception in sr: " + e); e.printStackTrace(); } @@ -408,28 +528,30 @@ public class Conservation { } // This is the quality for one column - if (max < bigtot) { + if (max < bigtot) + { max = bigtot; } // bigtot = bigtot * (size-cons2[j][23])/size; quality.addElement(new Double(bigtot)); - s += "-"; // Need to normalize by gaps } double newmax = -10000; - for (int j = start; j <= end; j++) { - double tmp = ((Double) quality.elementAt(j)).doubleValue(); + for (j = start; j <= end; j++) + { + tmp = ((Double) quality.elementAt(j)).doubleValue(); tmp = ((max - tmp) * (size - cons2[j][23])) / size; // System.out.println(tmp+ " " + j); quality.setElementAt(new Double(tmp), j); - if (tmp > newmax) { + if (tmp > newmax) + { newmax = tmp; } }