X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fanalysis%2FConservation.java;h=0fe4f5ec9d5b6092ec4bcaa2ad200753e3343007;hb=b8d09897dacc7b0ad203982b4578e2c1d8929142;hp=bfb2e004f75502a1e2e520e3fb3e92f390257069;hpb=588042b69abf8e60bcc950b24c283933c7dd422f;p=jalview.git diff --git a/src/jalview/analysis/Conservation.java b/src/jalview/analysis/Conservation.java index bfb2e00..0fe4f5e 100755 --- a/src/jalview/analysis/Conservation.java +++ b/src/jalview/analysis/Conservation.java @@ -20,12 +20,17 @@ package jalview.analysis; import jalview.datamodel.*; -import jalview.gui.*; - import java.util.*; -public class Conservation { +/** + * Calculates conservation values for a given set of sequences + * + * @author $author$ + * @version $Revision$ + */ +public class Conservation +{ Vector sequences; int start; int end; @@ -33,7 +38,11 @@ public class Conservation { int maxLength = 0; // used by quality calcs boolean seqNumsChanged = false; // updated after any change via calcSeqNum; Vector total = new Vector(); + + /** Stores calculated quality values */ public Vector quality; + + /** Stores maximum and minimum values of quality values */ public Double[] qualityRange = new Double[2]; String consString = ""; Sequence consSequence; @@ -42,8 +51,19 @@ public class Conservation { String name = ""; int[][] cons2; + /** + * Creates a new Conservation object. + * + * @param name Name of conservation + * @param propHash DOCUMENT ME! + * @param threshold to count the residues in residueHash(). commonly used value is 3 + * @param sequences sequences to be used in calculation + * @param start start residue position + * @param end end residue position + */ public Conservation(String name, Hashtable propHash, int threshold, - Vector sequences, int start, int end) { + Vector sequences, int start, int end) + { this.name = name; this.propHash = propHash; this.threshold = threshold; @@ -54,81 +74,115 @@ public class Conservation { calcSeqNums(); } - private void calcSeqNums() { - for (int i = 0; i < sequences.size(); i++) { + /** + * DOCUMENT ME! + */ + private void calcSeqNums() + { + for (int i = 0; i < sequences.size(); i++) + { calcSeqNum(i); } } - private void calcSeqNum(int i) { + /** + * DOCUMENT ME! + * + * @param i DOCUMENT ME! + */ + private void calcSeqNum(int i) + { String sq = null; // for dumb jbuilder not-inited exception warning int[] sqnum = null; - if ((i > -1) && (i < sequences.size())) { + if ((i > -1) && (i < sequences.size())) + { sq = ((SequenceI) sequences.elementAt(i)).getSequence(); - if (seqNums.size() <= i) { + if (seqNums.size() <= i) + { seqNums.addElement(new int[sq.length() + 1]); } - if (sq.hashCode() != ((int[]) seqNums.elementAt(i))[0]) { + if (sq.hashCode() != ((int[]) seqNums.elementAt(i))[0]) + { int j; int len; seqNumsChanged = true; sq = ((SequenceI) sequences.elementAt(i)).getSequence(); len = sq.length(); - if (maxLength < len) { + if (maxLength < len) + { maxLength = len; } sqnum = new int[len + 1]; // better to always make a new array - sequence can change its length sqnum[0] = sq.hashCode(); - for (j = 1; j <= len; j++) { + for (j = 1; j <= len; j++) + { sqnum[j] = ((Integer) jalview.schemes.ResidueProperties.aaHash.get(new String( sq.substring(j - 1, j)))).intValue(); // yuk } seqNums.setElementAt(sqnum, i); } - } else { + } + else + { // JBPNote INFO level debug System.err.println( "ERROR: calcSeqNum called with out of range sequence index for Alignment\n"); } } - public void calculate() { - for (int i = start; i <= end; i++) { + /** + * Calculates the conservation values for given set of sequences + */ + public void calculate() + { + for (int i = start; i <= end; i++) + { Hashtable resultHash = null; Hashtable residueHash = null; resultHash = new Hashtable(); residueHash = new Hashtable(); - for (int j = 0; j < sequences.size(); j++) { + for (int j = 0; j < sequences.size(); j++) + { // JBPNote - have to make sure elements of the sequences vector // are tested like this everywhere... - if (sequences.elementAt(j) instanceof Sequence) { + if (sequences.elementAt(j) instanceof Sequence) + { Sequence s = (Sequence) sequences.elementAt(j); - if (s.getLength() > i) { + if (s.getLength() > i) + { String res = s.getSequence().substring(i, i + 1); - if (residueHash.containsKey(res)) { + if (residueHash.containsKey(res)) + { int count = ((Integer) residueHash.get(res)).intValue(); count++; residueHash.put(res, new Integer(count)); - } else { + } + else + { residueHash.put(res, new Integer(1)); } - } else { - if (residueHash.containsKey("-")) { + } + else + { + if (residueHash.containsKey("-")) + { int count = ((Integer) residueHash.get("-")).intValue(); count++; residueHash.put("-", new Integer(count)); - } else { + } + else + { residueHash.put("-", new Integer(1)); } } @@ -141,26 +195,35 @@ public class Conservation { //loop over all the found residues Enumeration e = residueHash.keys(); - while (e.hasMoreElements()) { + while (e.hasMoreElements()) + { String res = (String) e.nextElement(); - if (((Integer) residueHash.get(res)).intValue() > thresh) { + if (((Integer) residueHash.get(res)).intValue() > thresh) + { //Now loop over the properties Enumeration e2 = propHash.keys(); - while (e2.hasMoreElements()) { + while (e2.hasMoreElements()) + { String type = (String) e2.nextElement(); Hashtable ht = (Hashtable) propHash.get(type); //Have we ticked this before? - if (!resultHash.containsKey(type)) { - if (ht.containsKey(res)) { + if (!resultHash.containsKey(type)) + { + if (ht.containsKey(res)) + { resultHash.put(type, ht.get(res)); - } else { + } + else + { resultHash.put(type, ht.get("-")); } - } else if (((Integer) resultHash.get(type)).equals( - (Integer) ht.get(res)) == false) { + } + else if (((Integer) resultHash.get(type)).equals( + (Integer) ht.get(res)) == false) + { resultHash.put(type, new Integer(-1)); } } @@ -171,40 +234,23 @@ public class Conservation { } } - public int countGaps(int j) { - int count = 0; - - for (int i = 0; i < sequences.size(); i++) { - if ((j + 1) > ((Sequence) sequences.elementAt(i)).getSequence() - .length()) { - count++; - - continue; - } - - char c = ((Sequence) sequences.elementAt(i)).getSequence().charAt(j); - - if (jalview.util.Comparison.isGap((c))) { - count++; - } - } - - return count; - } /*** * countConsNGaps * returns gap count in int[0], and conserved residue count in int[1] */ - public int[] countConsNGaps(int j) { + public int[] countConsNGaps(int j) + { int count = 0; int cons = 0; int nres = 0; int[] r = new int[2]; char f = '$'; - for (int i = 0; i < sequences.size(); i++) { - if (j >= ((Sequence) sequences.elementAt(i)).getSequence().length()) { + for (int i = 0; i < sequences.size(); i++) + { + if (j >= ((Sequence) sequences.elementAt(i)).getSequence().length()) + { count++; continue; @@ -212,15 +258,21 @@ public class Conservation { char c = ((Sequence) sequences.elementAt(i)).getSequence().charAt(j); - if (jalview.util.Comparison.isGap((c))) { + if (jalview.util.Comparison.isGap((c))) + { count++; - } else { + } + else + { nres++; - if (nres == 1) { + if (nres == 1) + { f = c; cons++; - } else if (f == c) { + } + else if (f == c) + { cons++; } } @@ -232,45 +284,64 @@ public class Conservation { return r; } - public void verdict(boolean consflag, float percentageGaps) { + /** + * Calculates the conservation sequence + * + * @param consflag if true, poitiveve conservation; false calculates negative conservation + * @param percentageGaps commonly used value is 25 + */ + public void verdict(boolean consflag, float percentageGaps) + { String consString = ""; - for (int i = start; i <= end; i++) { + for (int i = start; i <= end; i++) + { int[] gapcons = countConsNGaps(i); - boolean cons = (gapcons[0] == 1) ? true : false; int totGaps = gapcons[1]; float pgaps = ((float) totGaps * 100) / (float) sequences.size(); // System.out.println("percentage gaps = "+pgaps+"\n"); - if (percentageGaps > pgaps) { + if (percentageGaps > pgaps) + { Hashtable resultHash = (Hashtable) total.elementAt(i - start); //Now find the verdict int count = 0; Enumeration e3 = resultHash.keys(); - while (e3.hasMoreElements()) { + while (e3.hasMoreElements()) + { String type = (String) e3.nextElement(); Integer result = (Integer) resultHash.get(type); //Do we want to count +ve conservation or +ve and -ve cons.? - if (consflag) { - if (result.intValue() == 1) { + if (consflag) + { + if (result.intValue() == 1) + { count++; } - } else { - if (result.intValue() != -1) { + } + else + { + if (result.intValue() != -1) + { count++; } } } - if (count < 10) { + if (count < 10) + { consString = consString + String.valueOf(count); // Conserved props!=Identity - } else { + } + else + { consString = consString + ((gapcons[0] == 1) ? "*" : "+"); } - } else { + } + else + { consString = consString + "-"; } } @@ -278,24 +349,38 @@ public class Conservation { consSequence = new Sequence(name, consString, start, end); } - public Sequence getConsSequence() { + /** + * + * + * @return Conservation sequence + */ + public Sequence getConsSequence() + { return consSequence; } // From Alignment.java in jalview118 - public void findQuality() { + public void findQuality() + { findQuality(0, maxLength - 1); } - private void percentIdentity2() { + /** + * DOCUMENT ME! + */ + private void percentIdentity2() + { calcSeqNums(); // updates maxLength, too. - if ((cons2 == null) || seqNumsChanged) { + if ((cons2 == null) || seqNumsChanged) + { cons2 = new int[maxLength][24]; // Initialize the array - for (int j = 0; j < 24; j++) { - for (int i = 0; i < maxLength; i++) { + for (int j = 0; j < 24; j++) + { + for (int i = 0; i < maxLength; i++) + { cons2[i][j] = 0; } } @@ -303,14 +388,17 @@ public class Conservation { int[] sqnum; int j = 0; - while (j < sequences.size()) { + while (j < sequences.size()) + { sqnum = (int[]) seqNums.elementAt(j); - for (int i = 1; i < sqnum.length; i++) { + for (int i = 1; i < sqnum.length; i++) + { cons2[i - 1][sqnum[i]]++; } - for (int i = sqnum.length - 1; i < maxLength; i++) { + for (int i = sqnum.length - 1; i < maxLength; i++) + { cons2[i][23]++; // gap count } @@ -336,7 +424,14 @@ public class Conservation { } } - public void findQuality(int start, int end) { + /** + * Calculates the quality of the set of sequences + * + * @param start Start residue + * @param end End residue + */ + public void findQuality(int start, int end) + { quality = new Vector(); double max = -10000; @@ -354,21 +449,27 @@ public class Conservation { for (int l = 0; l < size; l++) lengths[l] = ((int[]) seqNums.elementAt(l)).length - 1; - for (int j = start; j <= end; j++) { + for (int j = start; j <= end; j++) + { double bigtot = 0; // First Xr = depends on column only double[] x = new double[24]; - for (int ii = 0; ii < 24; ii++) { + for (int ii = 0; ii < 24; ii++) + { x[ii] = 0; - try { - for (int i2 = 0; i2 < 24; i2++) { + try + { + for (int i2 = 0; i2 < 24; i2++) + { x[ii] += (((double) cons2[j][i2] * BLOSUM62[ii][i2]) + 4); } - } catch (Exception e) { + } + catch (Exception e) + { System.err.println("Exception during quality calculation."); e.printStackTrace(); } @@ -380,19 +481,24 @@ public class Conservation { } // Now calculate D for each position and sum - for (int k = 0; k < size; k++) { + for (int k = 0; k < size; k++) + { double tot = 0; double[] xx = new double[24]; int seqNum = (j < lengths[k]) ? ((int[]) seqNums.elementAt(k))[j + 1] : 23; // Sequence, or gap at the end // This is a loop over r - for (int i = 0; i < 23; i++) { + for (int i = 0; i < 23; i++) + { double sr = 0; - try { + try + { sr = (double) BLOSUM62[i][seqNum] + 4; - } catch (Exception e) { + } + catch (Exception e) + { System.out.println("Exception in sr: " + e); e.printStackTrace(); } @@ -408,7 +514,8 @@ public class Conservation { } // This is the quality for one column - if (max < bigtot) { + if (max < bigtot) + { max = bigtot; } @@ -422,14 +529,16 @@ public class Conservation { double newmax = -10000; - for (int j = start; j <= end; j++) { + for (int j = start; j <= end; j++) + { double tmp = ((Double) quality.elementAt(j)).doubleValue(); tmp = ((max - tmp) * (size - cons2[j][23])) / size; // System.out.println(tmp+ " " + j); quality.setElementAt(new Double(tmp), j); - if (tmp > newmax) { + if (tmp > newmax) + { newmax = tmp; } }