2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.analysis;
23 import jalview.datamodel.BinarySequence;
24 import jalview.datamodel.BinarySequence.InvalidSequenceTypeException;
25 import jalview.math.Matrix;
26 import jalview.math.MatrixI;
27 import jalview.math.SparseMatrix;
28 import jalview.schemes.ResidueProperties;
29 import jalview.schemes.ScoreMatrix;
31 import java.io.PrintStream;
34 * Performs Principal Component Analysis on given sequences
36 public class PCA implements Runnable
38 boolean jvCalcMode = true;
50 StringBuilder details = new StringBuilder(1024);
53 * Creates a new PCA object. By default, uses blosum62 matrix to generate
54 * sequence similarity matrices
57 * Set of amino acid sequences to perform PCA on
59 public PCA(String[] s)
65 * Creates a new PCA object. By default, uses blosum62 matrix to generate
66 * sequence similarity matrices
69 * Set of sequences to perform PCA on
71 * if true, uses standard DNA/RNA matrix for sequence similarity
74 public PCA(String[] s, boolean nucleotides)
76 this(s, nucleotides, null);
79 public PCA(String[] s, boolean nucleotides, String s_m)
82 BinarySequence[] bs = new BinarySequence[s.length];
85 while ((ii < s.length) && (s[ii] != null))
87 bs[ii] = new BinarySequence(s[ii], nucleotides);
92 BinarySequence[] bs2 = new BinarySequence[s.length];
93 ScoreMatrix smtrx = null;
97 smtrx = ResidueProperties.getScoreMatrix(sm);
101 // either we were given a non-existent score matrix or a scoremodel that
102 // isn't based on a pairwise symbol score matrix
103 smtrx = ResidueProperties.getScoreMatrix(sm = (nucleotides ? "DNA"
106 details.append("PCA calculation using " + sm
107 + " sequence similarity matrix\n========\n\n");
109 while ((ii < s.length) && (s[ii] != null))
111 bs2[ii] = new BinarySequence(s[ii], nucleotides);
116 bs2[ii].matrixEncode(smtrx);
117 } catch (InvalidSequenceTypeException x)
119 details.append("Unexpected mismatch of sequence type and score matrix. Calculation will not be valid!\n\n");
126 while ((count < bs.length) && (bs[count] != null))
131 double[][] seqmat = new double[count][];
132 double[][] seqmat2 = new double[count][];
137 seqmat[i] = bs[i].getDBinary();
138 seqmat2[i] = bs2[i].getDBinary();
143 * using a SparseMatrix to hold the encoded sequences matrix
144 * greatly speeds up matrix multiplication as these are mostly zero
146 m = new SparseMatrix(seqmat);
147 m2 = new Matrix(seqmat2);
152 * Returns the matrix used in PCA calculation
157 public MatrixI getM()
166 * Index of diagonal within matrix
168 * @return Returns value of diagonal from matrix
170 public double getEigenvalue(int i)
172 return eigenvector.getD()[i];
187 * @return DOCUMENT ME!
189 public float[][] getComponents(int l, int n, int mm, float factor)
191 float[][] out = new float[m.height()][3];
193 for (int i = 0; i < m.height(); i++)
195 out[i][0] = (float) component(i, l) * factor;
196 out[i][1] = (float) component(i, n) * factor;
197 out[i][2] = (float) component(i, mm) * factor;
209 * @return DOCUMENT ME!
211 public double[] component(int n)
213 // n = index of eigenvector
214 double[] out = new double[m.height()];
216 for (int i = 0; i < m.height(); i++)
218 out[i] = component(i, n);
232 * @return DOCUMENT ME!
234 double component(int row, int n)
238 for (int i = 0; i < symm.width(); i++)
240 out += (symm.getValue(row, i) * eigenvector.getValue(i, n));
243 return out / eigenvector.getD()[n];
246 public String getDetails()
248 return details.toString();
257 PrintStream ps = new PrintStream(System.out)
260 public void print(String x)
266 public void println()
268 details.append("\n");
272 // long now = System.currentTimeMillis();
275 details.append("PCA Calculation Mode is "
276 + (jvCalcMode ? "Jalview variant" : "Original SeqSpace")
278 MatrixI mt = m.transpose();
280 details.append(" --- OrigT * Orig ---- \n");
282 eigenvector = mt.preMultiply(jvCalcMode ? m2 : m);
284 eigenvector.print(ps, "%8.2f");
286 symm = eigenvector.copy();
290 details.append(" ---Tridiag transform matrix ---\n");
291 details.append(" --- D vector ---\n");
292 eigenvector.printD(ps, "%15.4e");
294 details.append("--- E vector ---\n");
295 eigenvector.printE(ps, "%15.4e");
298 // Now produce the diagonalization matrix
300 } catch (Exception q)
303 details.append("\n*** Unexpected exception when performing PCA ***\n"
304 + q.getLocalizedMessage());
305 details.append("*** Matrices below may not be fully diagonalised. ***\n");
308 details.append(" --- New diagonalization matrix ---\n");
309 eigenvector.print(ps, "%8.2f");
310 details.append(" --- Eigenvalues ---\n");
311 eigenvector.printD(ps, "%15.4e");
314 * for (int seq=0;seq<symm.rows;seq++) { ps.print("\"Seq"+seq+"\""); for
315 * (int ev=0;ev<symm.rows; ev++) {
317 * ps.print(","+component(seq, ev)); } ps.println(); }
319 // System.out.println(("PCA.run() took "
320 // + (System.currentTimeMillis() - now) + "ms"));
323 public void setJvCalcMode(boolean calcMode)
325 this.jvCalcMode = calcMode;