-/* Jalview - a java multiple alignment editor\r
- * Copyright (C) 1998 Michele Clamp\r
- *\r
- * This program is free software; you can redistribute it and/or\r
- * modify it under the terms of the GNU General Public License\r
- * as published by the Free Software Foundation; either version 2\r
- * of the License, or (at your option) any later version.\r
- *\r
- * This program is distributed in the hope that it will be useful,\r
- * but WITHOUT ANY WARRANTY; without even the implied warranty of\r
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\r
- * GNU General Public License for more details.\r
- *\r
- * You should have received a copy of the GNU General Public License\r
- * along with this program; if not, write to the Free Software\r
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.\r
- */\r
+/*\r
+* Jalview - A Sequence Alignment Editor and Viewer\r
+* Copyright (C) 2005 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle\r
+*\r
+* This program is free software; you can redistribute it and/or\r
+* modify it under the terms of the GNU General Public License\r
+* as published by the Free Software Foundation; either version 2\r
+* of the License, or (at your option) any later version.\r
+*\r
+* This program is distributed in the hope that it will be useful,\r
+* but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\r
+* GNU General Public License for more details.\r
+*\r
+* You should have received a copy of the GNU General Public License\r
+* along with this program; if not, write to the Free Software\r
+* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA\r
+*/\r
package jalview.analysis;\r
\r
+import jalview.datamodel.*;\r
\r
import java.util.*;\r
-import jalview.gui.*;\r
-import jalview.datamodel.*;\r
\r
\r
-public class Conservation {\r
- Vector sequences;\r
- int start;\r
- int end;\r
+/**\r
+ * Calculates conservation values for a given set of sequences\r
+ *\r
+ * @author $author$\r
+ * @version $Revision$\r
+ */\r
+public class Conservation\r
+{\r
+ Vector sequences;\r
+ int start;\r
+ int end;\r
+ Vector seqNums; // vector of int vectors where first is sequence checksum\r
+ int maxLength = 0; // used by quality calcs\r
+ boolean seqNumsChanged = false; // updated after any change via calcSeqNum;\r
+ Vector total = new Vector();\r
+\r
+ /** Stores calculated quality values */\r
+ public Vector quality;\r
+\r
+ /** Stores maximum and minimum values of quality values */\r
+ public Double[] qualityRange = new Double[2];\r
+ String consString = "";\r
+ Sequence consSequence;\r
+ Hashtable propHash;\r
+ int threshold;\r
+ String name = "";\r
+ int[][] cons2;\r
+\r
+ /**\r
+ * Creates a new Conservation object.\r
+ *\r
+ * @param name Name of conservation\r
+ * @param propHash DOCUMENT ME!\r
+ * @param threshold to count the residues in residueHash(). commonly used value is 3\r
+ * @param sequences sequences to be used in calculation\r
+ * @param start start residue position\r
+ * @param end end residue position\r
+ */\r
+ public Conservation(String name, Hashtable propHash, int threshold,\r
+ Vector sequences, int start, int end)\r
+ {\r
+ this.name = name;\r
+ this.propHash = propHash;\r
+ this.threshold = threshold;\r
+ this.sequences = sequences;\r
+ this.start = start;\r
+ this.end = end;\r
+ seqNums = new Vector(sequences.size());\r
+ calcSeqNums();\r
+ }\r
\r
- Vector total = new Vector();\r
+ /**\r
+ * DOCUMENT ME!\r
+ */\r
+ private void calcSeqNums()\r
+ {\r
+ for (int i = 0; i < sequences.size(); i++)\r
+ {\r
+ calcSeqNum(i);\r
+ }\r
+ }\r
\r
- String consString = "";\r
+ /**\r
+ * DOCUMENT ME!\r
+ *\r
+ * @param i DOCUMENT ME!\r
+ */\r
+ private void calcSeqNum(int i)\r
+ {\r
+ String sq = null; // for dumb jbuilder not-inited exception warning\r
+ int[] sqnum = null;\r
+\r
+ if ((i > -1) && (i < sequences.size()))\r
+ {\r
+ sq = ((SequenceI) sequences.elementAt(i)).getSequence();\r
+\r
+ if (seqNums.size() <= i)\r
+ {\r
+ seqNums.addElement(new int[sq.length() + 1]);\r
+ }\r
\r
- DrawableSequence consSequence;\r
- Hashtable propHash;\r
- int threshold;\r
- Hashtable[] freqs;\r
+ if (sq.hashCode() != ((int[]) seqNums.elementAt(i))[0])\r
+ {\r
+ int j;\r
+ int len;\r
+ seqNumsChanged = true;\r
+ sq = ((SequenceI) sequences.elementAt(i)).getSequence();\r
+ len = sq.length();\r
+\r
+ if (maxLength < len)\r
+ {\r
+ maxLength = len;\r
+ }\r
+\r
+ sqnum = new int[len + 1]; // better to always make a new array - sequence can change its length\r
+ sqnum[0] = sq.hashCode();\r
+\r
+ for (j = 1; j <= len; j++)\r
+ {\r
+ sqnum[j] = ((Integer) jalview.schemes.ResidueProperties.aaHash.get(new String(\r
+ sq.substring(j - 1, j)))).intValue(); // yuk\r
+ }\r
+\r
+ seqNums.setElementAt(sqnum, i);\r
+ }\r
+ }\r
+ else\r
+ {\r
+ // JBPNote INFO level debug\r
+ System.err.println(\r
+ "ERROR: calcSeqNum called with out of range sequence index for Alignment\n");\r
+ }\r
+ }\r
\r
- String name = "";\r
+ /**\r
+ * Calculates the conservation values for given set of sequences\r
+ */\r
+ public void calculate()\r
+ {\r
+ for (int i = start; i <= end; i++)\r
+ {\r
+ Hashtable resultHash = null;\r
+ Hashtable residueHash = null;\r
+\r
+ resultHash = new Hashtable();\r
+ residueHash = new Hashtable();\r
+\r
+ for (int j = 0; j < sequences.size(); j++)\r
+ {\r
+ // JBPNote - have to make sure elements of the sequences vector\r
+ // are tested like this everywhere...\r
+ if (sequences.elementAt(j) instanceof Sequence)\r
+ {\r
+ Sequence s = (Sequence) sequences.elementAt(j);\r
+\r
+ if (s.getLength() > i)\r
+ {\r
+ String res = s.getSequence().substring(i, i + 1);\r
+\r
+ if (residueHash.containsKey(res))\r
+ {\r
+ int count = ((Integer) residueHash.get(res)).intValue();\r
+ count++;\r
+ residueHash.put(res, new Integer(count));\r
+ }\r
+ else\r
+ {\r
+ residueHash.put(res, new Integer(1));\r
+ }\r
+ }\r
+ else\r
+ {\r
+ if (residueHash.containsKey("-"))\r
+ {\r
+ int count = ((Integer) residueHash.get("-")).intValue();\r
+ count++;\r
+ residueHash.put("-", new Integer(count));\r
+ }\r
+ else\r
+ {\r
+ residueHash.put("-", new Integer(1));\r
+ }\r
+ }\r
+ }\r
+ }\r
\r
- public Conservation(String name,Hashtable[] freqs,Hashtable propHash, int threshold, Vector sequences, int start, int end) {\r
- this.name = name;\r
- this.freqs = freqs;\r
- this.propHash = propHash;\r
- this.threshold = threshold;\r
- this.sequences = sequences;\r
- this.start = start;\r
- this.end = end;\r
- }\r
+ //What is the count threshold to count the residues in residueHash()\r
+ int thresh = (threshold * (sequences.size())) / 100;\r
+\r
+ //loop over all the found residues\r
+ Enumeration e = residueHash.keys();\r
+\r
+ while (e.hasMoreElements())\r
+ {\r
+ String res = (String) e.nextElement();\r
+\r
+ if (((Integer) residueHash.get(res)).intValue() > thresh)\r
+ {\r
+ //Now loop over the properties\r
+ Enumeration e2 = propHash.keys();\r
+\r
+ while (e2.hasMoreElements())\r
+ {\r
+ String type = (String) e2.nextElement();\r
+ Hashtable ht = (Hashtable) propHash.get(type);\r
+\r
+ //Have we ticked this before?\r
+ if (!resultHash.containsKey(type))\r
+ {\r
+ if (ht.containsKey(res))\r
+ {\r
+ resultHash.put(type, ht.get(res));\r
+ }\r
+ else\r
+ {\r
+ resultHash.put(type, ht.get("-"));\r
+ }\r
+ }\r
+ else if (((Integer) resultHash.get(type)).equals(\r
+ (Integer) ht.get(res)) == false)\r
+ {\r
+ resultHash.put(type, new Integer(-1));\r
+ }\r
+ }\r
+ }\r
+ }\r
\r
+ total.addElement(resultHash);\r
+ }\r
+ }\r
\r
- public void calculate() {\r
\r
- for (int i = start;i <= end; i++) {\r
- Hashtable resultHash = null;\r
- Hashtable residueHash = null;\r
+ /***\r
+ * countConsNGaps\r
+ * returns gap count in int[0], and conserved residue count in int[1]\r
+ */\r
+ public int[] countConsNGaps(int j)\r
+ {\r
+ int count = 0;\r
+ int cons = 0;\r
+ int nres = 0;\r
+ int[] r = new int[2];\r
+ char f = '$';\r
+\r
+ for (int i = 0; i < sequences.size(); i++)\r
+ {\r
+ if (j >= ((Sequence) sequences.elementAt(i)).getSequence().length())\r
+ {\r
+ count++;\r
+\r
+ continue;\r
+ }\r
\r
- resultHash = new Hashtable();\r
- residueHash = new Hashtable();\r
+ char c = ((Sequence) sequences.elementAt(i)).getSequence().charAt(j);\r
\r
- for (int j=0; j < sequences.size(); j++) {\r
+ if (jalview.util.Comparison.isGap((c)))\r
+ {\r
+ count++;\r
+ }\r
+ else\r
+ {\r
+ nres++;\r
+\r
+ if (nres == 1)\r
+ {\r
+ f = c;\r
+ cons++;\r
+ }\r
+ else if (f == c)\r
+ {\r
+ cons++;\r
+ }\r
+ }\r
+ }\r
\r
- if (sequences.elementAt(j) instanceof Sequence) {\r
- Sequence s = (Sequence)sequences.elementAt(j);\r
+ r[0] = (nres == cons) ? 1 : 0;\r
+ r[1] = count;\r
\r
- if (s.getSequence().length() > i) {\r
- String res = s.getSequence().substring(i,i+1);\r
+ return r;\r
+ }\r
\r
- if (residueHash.containsKey(res)) {\r
- int count = ((Integer)residueHash.get(res)).intValue() ;\r
- count++;\r
- residueHash.put(res,new Integer(count));\r
- } else {\r
- residueHash.put(res,new Integer(1));\r
+ /**\r
+ * Calculates the conservation sequence\r
+ *\r
+ * @param consflag if true, poitiveve conservation; false calculates negative conservation\r
+ * @param percentageGaps commonly used value is 25\r
+ */\r
+ public void verdict(boolean consflag, float percentageGaps)\r
+ {\r
+ String consString = "";\r
+\r
+ for (int i = start; i <= end; i++)\r
+ {\r
+ int[] gapcons = countConsNGaps(i);\r
+ int totGaps = gapcons[1];\r
+ float pgaps = ((float) totGaps * 100) / (float) sequences.size();\r
+\r
+ // System.out.println("percentage gaps = "+pgaps+"\n");\r
+ if (percentageGaps > pgaps)\r
+ {\r
+ Hashtable resultHash = (Hashtable) total.elementAt(i - start);\r
+\r
+ //Now find the verdict\r
+ int count = 0;\r
+ Enumeration e3 = resultHash.keys();\r
+\r
+ while (e3.hasMoreElements())\r
+ {\r
+ String type = (String) e3.nextElement();\r
+ Integer result = (Integer) resultHash.get(type);\r
+\r
+ //Do we want to count +ve conservation or +ve and -ve cons.?\r
+ if (consflag)\r
+ {\r
+ if (result.intValue() == 1)\r
+ {\r
+ count++;\r
+ }\r
+ }\r
+ else\r
+ {\r
+ if (result.intValue() != -1)\r
+ {\r
+ count++;\r
+ }\r
+ }\r
+ }\r
+\r
+ if (count < 10)\r
+ {\r
+ consString = consString + String.valueOf(count); // Conserved props!=Identity\r
+ }\r
+ else\r
+ {\r
+ consString = consString + ((gapcons[0] == 1) ? "*" : "+");\r
+ }\r
}\r
- } else {\r
- if (residueHash.containsKey("-")) {\r
- int count = ((Integer)residueHash.get("-")).intValue() ;\r
- count++;\r
- residueHash.put("-",new Integer(count));\r
- } else {\r
- residueHash.put("-",new Integer(1));\r
+ else\r
+ {\r
+ consString = consString + "-";\r
}\r
- }\r
}\r
- }\r
\r
- //What is the count threshold to count the residues in residueHash()\r
- int thresh = threshold*(sequences.size())/100;\r
+ consSequence = new Sequence(name, consString, start, end);\r
+ }\r
\r
- //loop over all the found residues\r
- Enumeration e = residueHash.keys();\r
+ /**\r
+ *\r
+ *\r
+ * @return Conservation sequence\r
+ */\r
+ public Sequence getConsSequence()\r
+ {\r
+ return consSequence;\r
+ }\r
\r
- while (e.hasMoreElements()) {\r
+ // From Alignment.java in jalview118\r
+ public void findQuality()\r
+ {\r
+ findQuality(0, maxLength - 1);\r
+ }\r
\r
- String res = (String)e.nextElement();\r
- if (((Integer)residueHash.get(res)).intValue() > thresh) {\r
+ /**\r
+ * DOCUMENT ME!\r
+ */\r
+ private void percentIdentity2()\r
+ {\r
+ calcSeqNums(); // updates maxLength, too.\r
+\r
+ if ((cons2 == null) || seqNumsChanged)\r
+ {\r
+ cons2 = new int[maxLength][24];\r
+\r
+ // Initialize the array\r
+ for (int j = 0; j < 24; j++)\r
+ {\r
+ for (int i = 0; i < maxLength; i++)\r
+ {\r
+ cons2[i][j] = 0;\r
+ }\r
+ }\r
\r
- //Now loop over the properties\r
- Enumeration e2 = propHash.keys();\r
+ int[] sqnum;\r
+ int j = 0;\r
\r
- while (e2.hasMoreElements()) {\r
- String type = (String)e2.nextElement();\r
- Hashtable ht = (Hashtable)propHash.get(type);\r
+ while (j < sequences.size())\r
+ {\r
+ sqnum = (int[]) seqNums.elementAt(j);\r
\r
- //Have we ticked this before?\r
- if (! resultHash.containsKey(type)) {\r
- if (ht.containsKey(res)) {\r
- resultHash.put(type,ht.get(res));\r
- } else {\r
- resultHash.put(type,ht.get("-"));\r
- }\r
- } else if ( ((Integer)resultHash.get(type)).equals((Integer)ht.get(res)) == false) {\r
- resultHash.put(type,new Integer(-1));\r
- }\r
- }\r
- }\r
- }\r
- total.addElement(resultHash);\r
- }\r
- }\r
+ for (int i = 1; i < sqnum.length; i++)\r
+ {\r
+ cons2[i - 1][sqnum[i]]++;\r
+ }\r
\r
- public int countGaps(int j) {\r
- int count = 0;\r
+ for (int i = sqnum.length - 1; i < maxLength; i++)\r
+ {\r
+ cons2[i][23]++; // gap count\r
+ }\r
\r
- for (int i = 0; i < sequences.size();i++) {\r
- String tmp = ((Sequence)sequences.elementAt(i)).getSequence().substring(j,j+1);\r
- if (tmp.equals(" ") || tmp.equals(".") || tmp.equals("-")) {\r
- count++;\r
- }\r
- }\r
- return count;\r
- }\r
-\r
- public void verdict(boolean consflag, float percentageGaps) {\r
- String consString = "";\r
+ j++;\r
+ }\r
\r
- for (int i=start; i <= end; i++) {\r
- int totGaps = countGaps(i);\r
- float pgaps = (float)totGaps*100/(float)sequences.size();\r
+ // unnecessary ?\r
\r
- if (percentageGaps > pgaps) {\r
- Hashtable resultHash = (Hashtable)total.elementAt(i);\r
+ /* for (int i=start; i <= end; i++) {\r
+ int max = -1000;\r
+ int maxi = -1;\r
+ int maxj = -1;\r
\r
- //Now find the verdict\r
- int count = 0;\r
- Enumeration e3 = resultHash.keys();\r
+ for (int j=0;j<24;j++) {\r
+ if (cons2[i][j] > max) {\r
+ max = cons2[i][j];\r
+ maxi = i;\r
+ maxj = j;\r
+ }\r
\r
- while (e3.hasMoreElements()) {\r
- String type = (String)e3.nextElement();\r
- Integer result = (Integer)resultHash.get(type);\r
+ }\r
+ } */\r
+ }\r
+ }\r
\r
- //Do we want to count +ve conservation or +ve and -ve cons.?\r
+ /**\r
+ * Calculates the quality of the set of sequences\r
+ *\r
+ * @param start Start residue\r
+ * @param end End residue\r
+ */\r
+ public void findQuality(int start, int end)\r
+ {\r
+ quality = new Vector();\r
+\r
+ double max = -10000;\r
+ String s = "";\r
+ int[][] BLOSUM62 = jalview.schemes.ResidueProperties.getBLOSUM62();\r
+\r
+ //Loop over columns // JBPNote Profiling info\r
+ // long ts = System.currentTimeMillis();\r
+ //long te = System.currentTimeMillis();\r
+ percentIdentity2();\r
+\r
+ int size = seqNums.size();\r
+ int[] lengths = new int[size];\r
+\r
+ for (int l = 0; l < size; l++)\r
+ lengths[l] = ((int[]) seqNums.elementAt(l)).length - 1;\r
+\r
+ for (int j = start; j <= end; j++)\r
+ {\r
+ double bigtot = 0;\r
+\r
+ // First Xr = depends on column only\r
+ double[] x = new double[24];\r
+\r
+ for (int ii = 0; ii < 24; ii++)\r
+ {\r
+ x[ii] = 0;\r
+\r
+ try\r
+ {\r
+ for (int i2 = 0; i2 < 24; i2++)\r
+ {\r
+ x[ii] += (((double) cons2[j][i2] * BLOSUM62[ii][i2]) +\r
+ 4);\r
+ }\r
+ }\r
+ catch (Exception e)\r
+ {\r
+ System.err.println("Exception during quality calculation.");\r
+ e.printStackTrace();\r
+ }\r
+\r
+ //System.out.println("X " + ii + " " + x[ii]);\r
+ x[ii] /= (size);\r
+\r
+ //System.out.println("X " + ii + " " + x[ii]);\r
+ }\r
\r
- if (consflag) {\r
- if (result.intValue() == 1) {\r
- count++;\r
+ // Now calculate D for each position and sum\r
+ for (int k = 0; k < size; k++)\r
+ {\r
+ double tot = 0;\r
+ double[] xx = new double[24];\r
+ int seqNum = (j < lengths[k])\r
+ ? ((int[]) seqNums.elementAt(k))[j + 1] : 23; // Sequence, or gap at the end\r
+\r
+ // This is a loop over r\r
+ for (int i = 0; i < 23; i++)\r
+ {\r
+ double sr = 0;\r
+\r
+ try\r
+ {\r
+ sr = (double) BLOSUM62[i][seqNum] + 4;\r
+ }\r
+ catch (Exception e)\r
+ {\r
+ System.out.println("Exception in sr: " + e);\r
+ e.printStackTrace();\r
+ }\r
+\r
+ //Calculate X with another loop over residues\r
+ // System.out.println("Xi " + i + " " + x[i] + " " + sr);\r
+ xx[i] = x[i] - sr;\r
+\r
+ tot += (xx[i] * xx[i]);\r
+ }\r
+\r
+ bigtot += Math.sqrt(tot);\r
}\r
- } else {\r
- if (result.intValue() != -1) {\r
- count++;\r
+\r
+ // This is the quality for one column\r
+ if (max < bigtot)\r
+ {\r
+ max = bigtot;\r
}\r
- }\r
- }\r
\r
- if (count < 10) {\r
- consString = consString + String.valueOf(count);\r
- } else {\r
- consString = consString + "*";\r
+ // bigtot = bigtot * (size-cons2[j][23])/size;\r
+ quality.addElement(new Double(bigtot));\r
+\r
+ s += "-";\r
+\r
+ // Need to normalize by gaps\r
}\r
- } else {\r
- consString = consString + "-";\r
- }\r
- }\r
\r
- consSequence = new DrawableSequence(name,consString,start,end);\r
+ double newmax = -10000;\r
+\r
+ for (int j = start; j <= end; j++)\r
+ {\r
+ double tmp = ((Double) quality.elementAt(j)).doubleValue();\r
+ tmp = ((max - tmp) * (size - cons2[j][23])) / size;\r
\r
- }\r
+ // System.out.println(tmp+ " " + j);\r
+ quality.setElementAt(new Double(tmp), j);\r
\r
- public jalview.gui.DrawableSequence getConsSequence() {\r
- return consSequence;\r
- }\r
+ if (tmp > newmax)\r
+ {\r
+ newmax = tmp;\r
+ }\r
+ }\r
\r
+ // System.out.println("Quality " + s);\r
+ qualityRange[0] = new Double(0);\r
+ qualityRange[1] = new Double(newmax);\r
+ }\r
}\r