X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fanalysis%2FPCA.java;h=4a3cfec1e48fcb11838fede41fd91c254c42e7d5;hb=cb8e52fbbc5f725e3f7f48c672cdddb0690bd978;hp=394869531a61d478b6b4f2d793991949e376e0ac;hpb=99c58ee0ae2a848f982552e53feaf6d5cb9925e5;p=jalview.git diff --git a/src/jalview/analysis/PCA.java b/src/jalview/analysis/PCA.java index 3948695..4a3cfec 100755 --- a/src/jalview/analysis/PCA.java +++ b/src/jalview/analysis/PCA.java @@ -1,208 +1,301 @@ -/* -* Jalview - A Sequence Alignment Editor and Viewer -* Copyright (C) 2005 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle -* -* This program is free software; you can redistribute it and/or -* modify it under the terms of the GNU General Public License -* as published by the Free Software Foundation; either version 2 -* of the License, or (at your option) any later version. -* -* This program is distributed in the hope that it will be useful, -* but WITHOUT ANY WARRANTY; without even the implied warranty of -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -* GNU General Public License for more details. -* -* You should have received a copy of the GNU General Public License -* along with this program; if not, write to the Free Software -* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA -*/ - -package jalview.analysis; - -import jalview.math.*; -import jalview.datamodel.*; -import jalview.util.*; - -import java.awt.*; -import java.io.*; - -public class PCA implements Runnable { - Matrix m; - Matrix symm; - Matrix m2; - - double[] eigenvalue; - Matrix eigenvector; - - public PCA(Matrix m) { - this.m = m; - } - - public PCA(SequenceI[] s) { - Runtime rt = Runtime.getRuntime(); - - BinarySequence[] bs = new BinarySequence[s.length]; - int ii = 0; - while (ii < s.length && s[ii] != null) { - - bs[ii] = new BinarySequence(s[ii]); - bs[ii].encode(); - ii++; - } - - BinarySequence[] bs2 = new BinarySequence[s.length]; - ii = 0; - while (ii < s.length && s[ii] != null) { - - bs2[ii] = new BinarySequence(s[ii]); - bs2[ii].blosumEncode(); - ii++; - } - - - //System.out.println("Created binary encoding"); - //printMemory(rt); - - int count=0; - while (count < bs.length && bs[count] != null) { - count++; - } - double[][] seqmat = new double[count][bs[0].getDBinary().length]; - double[][] seqmat2 = new double[count][bs2[0].getDBinary().length]; - int i=0; - while (i < count) { - seqmat[i] = bs[i].getDBinary(); - seqmat2[i] = bs2[i].getDBinary(); - i++; - } - //System.out.println("Created array"); - //printMemory(rt); - // System.out.println(" --- Original matrix ---- "); - m = new Matrix(seqmat,count,bs[0].getDBinary().length); - m2 = new Matrix(seqmat2,count,bs2[0].getDBinary().length); - - //System.out.println("Created matrix"); - printMemory(rt); - } - - public static void printMemory(Runtime rt) { - System.out.println("PCA:Free memory = " + rt.freeMemory()); - } - - public Matrix getM() { - return m; - } - - public double[] getEigenvector(int i) { - return eigenvector.getColumn(i); - } - - public double getEigenvalue(int i) { - return eigenvector.d[i]; - } - public float[][] getComponents(int l, int n, int mm) { - return getComponents(l,n,mm,1); - } - public float[][] getComponents(int l, int n, int mm, float factor) { - float[][] out = new float[m.rows][3]; - - for (int i = 0; i < m.rows;i++) { - out[i][0] = (float)component(i,l)*factor; - out[i][1] = (float)component(i,n)*factor; - out[i][2] = (float)component(i,mm)*factor; - } - return out; - } - - public double[] component(int n) { - // n = index of eigenvector - double[] out = new double[m.rows]; - - for (int i=0; i < m.rows; i++) { - out[i] = component(i,n); - } - return out; - } - public double component(int row, int n) { - double out = 0.0; - - for (int i = 0; i < symm.cols; i++) { - out += symm.value[row][i] * eigenvector.value[i][n]; - } - return out/eigenvector.d[n]; - } - - public void checkEigenvector(int n,PrintStream ps) { - ps.println(" --- Eigenvector " + n + " --- "); - - double[] eigenv = eigenvector.getColumn(n); - - for (int i=0; i < eigenv.length;i++) { - Format.print(ps,"%15.4f",eigenv[i]); - } - - System.out.println(); - - double[] neigenv = symm.vectorPostMultiply(eigenv); - System.out.println(" --- symmat * eigenv / lambda --- "); - if (eigenvector.d[n] > 1e-4) { - for (int i=0; i < neigenv.length;i++) { - Format.print(System.out,"%15.4f",neigenv[i]/eigenvector.d[n]); - } - } - System.out.println(); - } - - public void run() { - Matrix mt = m.transpose(); - // System.out.println(" --- OrigT * Orig ---- "); - eigenvector = mt.preMultiply(m2); - // eigenvector.print(System.out); - symm = eigenvector.copy(); - - //TextArea ta = new TextArea(25,72); - //TextAreaPrintStream taps = new TextAreaPrintStream(System.out,ta); - //Frame f = new Frame("PCA output"); - //f.resize(500,500); - //f.setLayout(new BorderLayout()); - //f.add("Center",ta); - //f.show(); - //symm.print(taps); - long tstart = System.currentTimeMillis(); - eigenvector.tred(); - long tend = System.currentTimeMillis(); - //taps.println("Time take for tred = " + (tend-tstart) + "ms"); - //taps.println(" ---Tridiag transform matrix ---"); - - //taps.println(" --- D vector ---"); - //eigenvector.printD(taps); - //taps.println(); - //taps.println(" --- E vector ---"); - // eigenvector.printE(taps); - //taps.println(); - - // Now produce the diagonalization matrix - tstart = System.currentTimeMillis(); - eigenvector.tqli(); - tend = System.currentTimeMillis(); - //System.out.println("Time take for tqli = " + (tend-tstart) + " ms"); - - //System.out.println(" --- New diagonalization matrix ---"); - - //System.out.println(" --- Eigenvalues ---"); - //eigenvector.printD(taps); - - //System.out.println(); - - // for (int i=0; i < eigenvector.cols; i++) { - // checkEigenvector(i,taps); - // taps.println(); - // } - - // taps.println(); - // taps.println("Transformed sequences = "); - // Matrix trans = m.preMultiply(eigenvector); - // trans.print(System.out); - } - -} +/* + * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) + * Copyright (C) $$Year-Rel$$ The Jalview Authors + * + * This file is part of Jalview. + * + * Jalview is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, either version 3 + * of the License, or (at your option) any later version. + * + * Jalview is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Jalview. If not, see . + * The Jalview Authors are detailed in the 'AUTHORS' file. + */ +package jalview.analysis; + +import jalview.api.analysis.ScoreModelI; +import jalview.api.analysis.SimilarityParamsI; +import jalview.bin.Console; +import jalview.datamodel.AlignmentView; +import jalview.datamodel.Point; +import jalview.math.MatrixI; + +import java.io.PrintStream; + +/** + * Performs Principal Component Analysis on given sequences + */ +public class PCA implements Runnable +{ + /* + * inputs + */ + final private AlignmentView seqs; + + final private ScoreModelI scoreModel; + + final private SimilarityParamsI similarityParams; + + /* + * outputs + */ + private MatrixI pairwiseScores; + + private MatrixI tridiagonal; + + private MatrixI eigenMatrix; + + /** + * Constructor given the sequences to compute for, the similarity model to + * use, and a set of parameters for sequence comparison + * + * @param sequences + * @param sm + * @param options + */ + public PCA(AlignmentView sequences, ScoreModelI sm, SimilarityParamsI options) + { + this.seqs = sequences; + this.scoreModel = sm; + this.similarityParams = options; + } + + /** + * Returns Eigenvalue + * + * @param i + * Index of diagonal within matrix + * + * @return Returns value of diagonal from matrix + */ + public double getEigenvalue(int i) + { + return eigenMatrix.getD()[i]; + } + + /** + * DOCUMENT ME! + * + * @param l + * DOCUMENT ME! + * @param n + * DOCUMENT ME! + * @param mm + * DOCUMENT ME! + * @param factor + * DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + public Point[] getComponents(int l, int n, int mm, float factor) + { + Point[] out = new Point[getHeight()]; + + for (int i = 0; i < getHeight(); i++) + { + float x = (float) component(i, l) * factor; + float y = (float) component(i, n) * factor; + float z = (float) component(i, mm) * factor; + out[i] = new Point(x, y, z); + } + + return out; + } + + /** + * DOCUMENT ME! + * + * @param n + * DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + public double[] component(int n) + { + // n = index of eigenvector + double[] out = new double[getHeight()]; + + for (int i = 0; i < out.length; i++) + { + out[i] = component(i, n); + } + + return out; + } + + /** + * DOCUMENT ME! + * + * @param row + * DOCUMENT ME! + * @param n + * DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + double component(int row, int n) + { + double out = 0.0; + + for (int i = 0; i < pairwiseScores.width(); i++) + { + out += (pairwiseScores.getValue(row, i) * eigenMatrix.getValue(i, n)); + } + + return out / eigenMatrix.getD()[n]; + } + + /** + * Answers a formatted text report of the PCA calculation results (matrices + * and eigenvalues) suitable for display + * + * @return + */ + public String getDetails() + { + StringBuilder sb = new StringBuilder(1024); + sb.append("PCA calculation using ").append(scoreModel.getName()) + .append(" sequence similarity matrix\n========\n\n"); + PrintStream ps = wrapOutputBuffer(sb); + + /* + * pairwise similarity scores + */ + sb.append(" --- OrigT * Orig ---- \n"); + pairwiseScores.print(ps, "%8.2f"); + + /* + * tridiagonal matrix, with D and E vectors + */ + sb.append(" ---Tridiag transform matrix ---\n"); + sb.append(" --- D vector ---\n"); + tridiagonal.printD(ps, "%15.4e"); + ps.println(); + sb.append("--- E vector ---\n"); + tridiagonal.printE(ps, "%15.4e"); + ps.println(); + + /* + * eigenvalues matrix, with D vector + */ + sb.append(" --- New diagonalization matrix ---\n"); + eigenMatrix.print(ps, "%8.2f"); + sb.append(" --- Eigenvalues ---\n"); + eigenMatrix.printD(ps, "%15.4e"); + ps.println(); + + return sb.toString(); + } + + /** + * Performs the PCA calculation + */ + @Override + public void run() + { + try + { + /* + * sequence pairwise similarity scores + */ + pairwiseScores = scoreModel.findSimilarities(seqs, similarityParams); + + /* + * tridiagonal matrix + */ + tridiagonal = pairwiseScores.copy(); + tridiagonal.tred(); + + /* + * the diagonalization matrix + */ + eigenMatrix = tridiagonal.copy(); + eigenMatrix.tqli(); + } catch (Exception q) + { + Console.error("Error computing PCA: " + q.getMessage()); + q.printStackTrace(); + } + } + + /** + * Returns a PrintStream that wraps (appends its output to) the given + * StringBuilder + * + * @param sb + * @return + */ + protected PrintStream wrapOutputBuffer(StringBuilder sb) + { + PrintStream ps = new PrintStream(System.out) + { + @Override + public void print(String x) + { + sb.append(x); + } + + @Override + public void println() + { + sb.append("\n"); + } + }; + return ps; + } + + /** + * Answers the N dimensions of the NxN PCA matrix. This is the number of + * sequences involved in the pairwise score calculation. + * + * @return + */ + public int getHeight() + { + // TODO can any of seqs[] be null? + return pairwiseScores.height();// seqs.getSequences().length; + } + + /** + * Answers the sequence pairwise similarity scores which were the first step + * of the PCA calculation + * + * @return + */ + public MatrixI getPairwiseScores() + { + return pairwiseScores; + } + + public void setPairwiseScores(MatrixI m) + { + pairwiseScores = m; + } + + public MatrixI getEigenmatrix() + { + return eigenMatrix; + } + + public void setEigenmatrix(MatrixI m) + { + eigenMatrix = m; + } + + public MatrixI getTridiagonal() + { + return tridiagonal; + } + + public void setTridiagonal(MatrixI tridiagonal) + { + this.tridiagonal = tridiagonal; + } +}