X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;ds=inline;f=src%2Fjalview%2Fanalysis%2FConservation.java;h=d1b508b57a6bd893d67352653569a8b572121634;hb=49b625d7c10b85425059a51706a2aca96deed955;hp=0fe4f5ec9d5b6092ec4bcaa2ad200753e3343007;hpb=efc31b4a8d5cee63555586804a2b79c06bdb5a14;p=jalview.git diff --git a/src/jalview/analysis/Conservation.java b/src/jalview/analysis/Conservation.java index 0fe4f5e..d1b508b 100755 --- a/src/jalview/analysis/Conservation.java +++ b/src/jalview/analysis/Conservation.java @@ -1,6 +1,6 @@ /* * Jalview - A Sequence Alignment Editor and Viewer -* Copyright (C) 2005 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle +* Copyright (C) 2006 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -31,13 +31,13 @@ import java.util.*; */ public class Conservation { - Vector sequences; + SequenceI [] sequences; int start; int end; Vector seqNums; // vector of int vectors where first is sequence checksum int maxLength = 0; // used by quality calcs boolean seqNumsChanged = false; // updated after any change via calcSeqNum; - Vector total = new Vector(); + Hashtable [] total; /** Stores calculated quality values */ public Vector quality; @@ -64,27 +64,27 @@ public class Conservation public Conservation(String name, Hashtable propHash, int threshold, Vector sequences, int start, int end) { + this.name = name; this.propHash = propHash; this.threshold = threshold; - this.sequences = sequences; this.start = start; this.end = end; - seqNums = new Vector(sequences.size()); - calcSeqNums(); - } - /** - * DOCUMENT ME! - */ - private void calcSeqNums() - { - for (int i = 0; i < sequences.size(); i++) + + int s, sSize = sequences.size(); + SequenceI[] sarray = new SequenceI[sSize]; + this.sequences = sarray; + + for (s = 0; s < sSize; s++) { - calcSeqNum(i); + sarray[s] = (SequenceI) sequences.elementAt(s); + if(sarray[s].getLength()>maxLength) + maxLength = sarray[s].getLength(); } } + /** * DOCUMENT ME! * @@ -95,9 +95,11 @@ public class Conservation String sq = null; // for dumb jbuilder not-inited exception warning int[] sqnum = null; - if ((i > -1) && (i < sequences.size())) + int sSize = sequences.length; + + if ((i > -1) && (i < sSize)) { - sq = ((SequenceI) sequences.elementAt(i)).getSequence(); + sq = sequences[i].getSequence(); if (seqNums.size() <= i) { @@ -109,7 +111,6 @@ public class Conservation int j; int len; seqNumsChanged = true; - sq = ((SequenceI) sequences.elementAt(i)).getSequence(); len = sq.length(); if (maxLength < len) @@ -122,12 +123,14 @@ public class Conservation for (j = 1; j <= len; j++) { - sqnum[j] = ((Integer) jalview.schemes.ResidueProperties.aaHash.get(new String( - sq.substring(j - 1, j)))).intValue(); // yuk + sqnum[j] = jalview.schemes.ResidueProperties.aaIndex[sq.charAt(j-1)]; } + seqNums.setElementAt(sqnum, i); } + else + System.out.println("NEVER THE EXCEPTION"); } else { @@ -142,72 +145,61 @@ public class Conservation */ public void calculate() { - for (int i = start; i <= end; i++) - { - Hashtable resultHash = null; - Hashtable residueHash = null; + Hashtable resultHash, ht; + int thresh, j, jSize = sequences.length; + int[] values; // Replaces residueHash + String type, res=null; + char c; + Enumeration enumeration2; - resultHash = new Hashtable(); - residueHash = new Hashtable(); + total = new Hashtable[maxLength]; + + for (int i = start; i <= end; i++) + { + values = new int[132]; - for (int j = 0; j < sequences.size(); j++) + for (j = 0; j < jSize; j++) { - // JBPNote - have to make sure elements of the sequences vector - // are tested like this everywhere... - if (sequences.elementAt(j) instanceof Sequence) - { - Sequence s = (Sequence) sequences.elementAt(j); + if (sequences[j].getLength() > i) + { + c = sequences[j].getCharAt(i); - if (s.getLength() > i) - { - String res = s.getSequence().substring(i, i + 1); + // No need to check if its a '-' + if (c == '.' || c == ' ') + c = '-'; - if (residueHash.containsKey(res)) - { - int count = ((Integer) residueHash.get(res)).intValue(); - count++; - residueHash.put(res, new Integer(count)); - } - else - { - residueHash.put(res, new Integer(1)); - } - } - else - { - if (residueHash.containsKey("-")) - { - int count = ((Integer) residueHash.get("-")).intValue(); - count++; - residueHash.put("-", new Integer(count)); - } - else - { - residueHash.put("-", new Integer(1)); - } - } + if ('a' <= c && c <= 'z') + { + c -= (32);// 32 = 'a' - 'A' } + + values[c]++; + } + else + { + values['-']++; + } } //What is the count threshold to count the residues in residueHash() - int thresh = (threshold * (sequences.size())) / 100; + thresh = (threshold * (jSize)) / 100; //loop over all the found residues - Enumeration e = residueHash.keys(); - - while (e.hasMoreElements()) + resultHash = new Hashtable(); + for (int v = '-'; v < 'Z'; v++) { - String res = (String) e.nextElement(); - if (((Integer) residueHash.get(res)).intValue() > thresh) + if (values[v] > thresh) { + res = String.valueOf( (char) v); + //Now loop over the properties - Enumeration e2 = propHash.keys(); + enumeration2 = propHash.keys(); - while (e2.hasMoreElements()) + while (enumeration2.hasMoreElements()) { - String type = (String) e2.nextElement(); - Hashtable ht = (Hashtable) propHash.get(type); + type = (String) enumeration2.nextElement(); + ht = (Hashtable) propHash.get(type); //Have we ticked this before? if (!resultHash.containsKey(type)) @@ -230,7 +222,7 @@ public class Conservation } } - total.addElement(resultHash); + total[i] = resultHash; } } @@ -246,17 +238,18 @@ public class Conservation int nres = 0; int[] r = new int[2]; char f = '$'; + int i, iSize = sequences.length; + char c; - for (int i = 0; i < sequences.size(); i++) + for (i = 0; i < iSize; i++) { - if (j >= ((Sequence) sequences.elementAt(i)).getSequence().length()) + if (j >= sequences[i].getLength()) { count++; - continue; } - char c = ((Sequence) sequences.elementAt(i)).getSequence().charAt(j); + c = sequences[i].getCharAt(j); // gaps do not have upper/lower case if (jalview.util.Comparison.isGap((c))) { @@ -292,27 +285,34 @@ public class Conservation */ public void verdict(boolean consflag, float percentageGaps) { - String consString = ""; + StringBuffer consString = new StringBuffer(); + String type; + Integer result; + int[] gapcons; + int totGaps, count; + float pgaps; + Hashtable resultHash ; + Enumeration enumeration; + for (int i = start; i <= end; i++) { - int[] gapcons = countConsNGaps(i); - int totGaps = gapcons[1]; - float pgaps = ((float) totGaps * 100) / (float) sequences.size(); + gapcons = countConsNGaps(i); + totGaps = gapcons[1]; + pgaps = ((float) totGaps * 100) / (float) sequences.length; - // System.out.println("percentage gaps = "+pgaps+"\n"); if (percentageGaps > pgaps) { - Hashtable resultHash = (Hashtable) total.elementAt(i - start); + resultHash = total[i - start]; //Now find the verdict - int count = 0; - Enumeration e3 = resultHash.keys(); + count = 0; + enumeration = resultHash.keys(); - while (e3.hasMoreElements()) + while (enumeration.hasMoreElements()) { - String type = (String) e3.nextElement(); - Integer result = (Integer) resultHash.get(type); + type = (String) enumeration.nextElement(); + result = (Integer) resultHash.get(type); //Do we want to count +ve conservation or +ve and -ve cons.? if (consflag) @@ -333,20 +333,20 @@ public class Conservation if (count < 10) { - consString = consString + String.valueOf(count); // Conserved props!=Identity + consString.append(count); // Conserved props!=Identity } else { - consString = consString + ((gapcons[0] == 1) ? "*" : "+"); + consString.append((gapcons[0] == 1) ? "*" : "+"); } } else { - consString = consString + "-"; + consString.append("-"); } } - consSequence = new Sequence(name, consString, start, end); + consSequence = new Sequence(name, consString.toString(), start, end); } /** @@ -370,7 +370,15 @@ public class Conservation */ private void percentIdentity2() { - calcSeqNums(); // updates maxLength, too. + seqNums = new Vector(); + // calcSeqNum(s); + int i = 0, iSize = sequences.length; + //Do we need to calculate this again? + for (i = 0; i < iSize; i++) + { + calcSeqNum(i); + } + if ((cons2 == null) || seqNumsChanged) { @@ -379,7 +387,7 @@ public class Conservation // Initialize the array for (int j = 0; j < 24; j++) { - for (int i = 0; i < maxLength; i++) + for (i = 0; i < maxLength; i++) { cons2[i][j] = 0; } @@ -388,16 +396,16 @@ public class Conservation int[] sqnum; int j = 0; - while (j < sequences.size()) + while (j < sequences.length) { sqnum = (int[]) seqNums.elementAt(j); - for (int i = 1; i < sqnum.length; i++) + for (i = 1; i < sqnum.length; i++) { cons2[i - 1][sqnum[i]]++; } - for (int i = sqnum.length - 1; i < maxLength; i++) + for (i = sqnum.length - 1; i < maxLength; i++) { cons2[i][23]++; // gap count } @@ -435,73 +443,56 @@ public class Conservation quality = new Vector(); double max = -10000; - String s = ""; int[][] BLOSUM62 = jalview.schemes.ResidueProperties.getBLOSUM62(); //Loop over columns // JBPNote Profiling info - // long ts = System.currentTimeMillis(); + //long ts = System.currentTimeMillis(); //long te = System.currentTimeMillis(); percentIdentity2(); int size = seqNums.size(); int[] lengths = new int[size]; + double tot, bigtot, sr, tmp; + double [] x, xx; + int l, j, i, ii, i2, k, seqNum; - for (int l = 0; l < size; l++) + for (l = 0; l < size; l++) lengths[l] = ((int[]) seqNums.elementAt(l)).length - 1; - for (int j = start; j <= end; j++) + for (j = start; j <= end; j++) { - double bigtot = 0; + bigtot = 0; // First Xr = depends on column only - double[] x = new double[24]; + x = new double[24]; - for (int ii = 0; ii < 24; ii++) + for (ii = 0; ii < 24; ii++) { x[ii] = 0; - try - { - for (int i2 = 0; i2 < 24; i2++) - { - x[ii] += (((double) cons2[j][i2] * BLOSUM62[ii][i2]) + - 4); - } - } - catch (Exception e) + for (i2 = 0; i2 < 24; i2++) { - System.err.println("Exception during quality calculation."); - e.printStackTrace(); + x[ii] += ( ( (double) cons2[j][i2] * BLOSUM62[ii][i2]) + + 4); } - //System.out.println("X " + ii + " " + x[ii]); - x[ii] /= (size); - - //System.out.println("X " + ii + " " + x[ii]); + x[ii] /= size; } // Now calculate D for each position and sum - for (int k = 0; k < size; k++) + for (k = 0; k < size; k++) { - double tot = 0; - double[] xx = new double[24]; - int seqNum = (j < lengths[k]) + tot = 0; + xx = new double[24]; + seqNum = (j < lengths[k]) ? ((int[]) seqNums.elementAt(k))[j + 1] : 23; // Sequence, or gap at the end // This is a loop over r - for (int i = 0; i < 23; i++) + for (i = 0; i < 23; i++) { - double sr = 0; + sr = 0; - try - { - sr = (double) BLOSUM62[i][seqNum] + 4; - } - catch (Exception e) - { - System.out.println("Exception in sr: " + e); - e.printStackTrace(); - } + sr = (double) BLOSUM62[i][seqNum] + 4; //Calculate X with another loop over residues // System.out.println("Xi " + i + " " + x[i] + " " + sr); @@ -522,16 +513,14 @@ public class Conservation // bigtot = bigtot * (size-cons2[j][23])/size; quality.addElement(new Double(bigtot)); - s += "-"; - // Need to normalize by gaps } double newmax = -10000; - for (int j = start; j <= end; j++) + for (j = start; j <= end; j++) { - double tmp = ((Double) quality.elementAt(j)).doubleValue(); + tmp = ((Double) quality.elementAt(j)).doubleValue(); tmp = ((max - tmp) * (size - cons2[j][23])) / size; // System.out.println(tmp+ " " + j);