X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fanalysis%2FPCA.java;h=d51f00e41216d12ed8ffd480525c95f5242ab09d;hb=ec8f3cedf60fb1feed6d34de6b49f6bfa78b9dd8;hp=1a4060ced77e9bbf51eb17846276c7be8fe3aa88;hpb=07394c1c2d9d4ae05c85cd6d9644e4d17f2818a2;p=jalview.git diff --git a/src/jalview/analysis/PCA.java b/src/jalview/analysis/PCA.java index 1a4060c..d51f00e 100755 --- a/src/jalview/analysis/PCA.java +++ b/src/jalview/analysis/PCA.java @@ -20,10 +20,12 @@ */ package jalview.analysis; -import jalview.math.Matrix; +import jalview.api.analysis.ScoreModelI; +import jalview.api.analysis.SimilarityParamsI; +import jalview.bin.Cache; +import jalview.datamodel.AlignmentView; +import jalview.datamodel.Point; import jalview.math.MatrixI; -import jalview.schemes.ResidueProperties; -import jalview.schemes.ScoreMatrix; import java.io.PrintStream; @@ -32,118 +34,37 @@ import java.io.PrintStream; */ public class PCA implements Runnable { - boolean jvCalcMode = true; - - MatrixI symm; + /* + * inputs + */ + final private AlignmentView seqs; - double[] eigenvalue; + final private ScoreModelI scoreModel; - MatrixI eigenvector; + final private SimilarityParamsI similarityParams; - StringBuilder details = new StringBuilder(1024); + /* + * outputs + */ + private MatrixI pairwiseScores; - private String[] seqs; + private MatrixI tridiagonal; - private ScoreMatrix scoreMatrix; + private MatrixI eigenMatrix; /** - * Creates a new PCA object. By default, uses blosum62 matrix to generate - * sequence similarity matrices + * Constructor given the sequences to compute for, the similarity model to + * use, and a set of parameters for sequence comparison * - * @param s - * Set of amino acid sequences to perform PCA on + * @param sequences + * @param sm + * @param options */ - public PCA(String[] s) + public PCA(AlignmentView sequences, ScoreModelI sm, SimilarityParamsI options) { - this(s, false); - } - - /** - * Creates a new PCA object. By default, uses blosum62 matrix to generate - * sequence similarity matrices - * - * @param s - * Set of sequences to perform PCA on - * @param nucleotides - * if true, uses standard DNA/RNA matrix for sequence similarity - * calculation. - */ - public PCA(String[] s, boolean nucleotides) - { - this(s, nucleotides, null); - } - - public PCA(String[] s, boolean nucleotides, String s_m) - { - this.seqs = s; - - // BinarySequence[] bs = new BinarySequence[s.length]; - // int ii = 0; - // - // while ((ii < s.length) && (s[ii] != null)) - // { - // bs[ii] = new BinarySequence(s[ii], nucleotides); - // bs[ii].encode(); - // ii++; - // } - // - // BinarySequence[] bs2 = new BinarySequence[s.length]; - scoreMatrix = null; - String sm = s_m; - if (sm != null) - { - scoreMatrix = ResidueProperties.getScoreMatrix(sm); - } - if (scoreMatrix == null) - { - // either we were given a non-existent score matrix or a scoremodel that - // isn't based on a pairwise symbol score matrix - scoreMatrix = ResidueProperties - .getScoreMatrix(sm = (nucleotides ? "DNA" : "BLOSUM62")); - } - details.append("PCA calculation using " + sm - + " sequence similarity matrix\n========\n\n"); - // ii = 0; - // while ((ii < s.length) && (s[ii] != null)) - // { - // bs2[ii] = new BinarySequence(s[ii], nucleotides); - // if (scoreMatrix != null) - // { - // try - // { - // bs2[ii].matrixEncode(scoreMatrix); - // } catch (InvalidSequenceTypeException x) - // { - // details.append("Unexpected mismatch of sequence type and score matrix. Calculation will not be valid!\n\n"); - // } - // } - // ii++; - // } - // - // int count = 0; - // while ((count < bs.length) && (bs[count] != null)) - // { - // count++; - // } - // - // double[][] seqmat = new double[count][]; - // double[][] seqmat2 = new double[count][]; - // - // int i = 0; - // while (i < count) - // { - // seqmat[i] = bs[i].getDBinary(); - // seqmat2[i] = bs2[i].getDBinary(); - // i++; - // } - // - // /* - // * using a SparseMatrix to hold the encoded sequences matrix - // * greatly speeds up matrix multiplication as these are mostly zero - // */ - // m = new SparseMatrix(seqmat); - // m2 = new Matrix(seqmat2); - + this.seqs = sequences; + this.scoreModel = sm; + this.similarityParams = options; } /** @@ -156,7 +77,7 @@ public class PCA implements Runnable */ public double getEigenvalue(int i) { - return eigenvector.getD()[i]; + return eigenMatrix.getD()[i]; } /** @@ -173,15 +94,16 @@ public class PCA implements Runnable * * @return DOCUMENT ME! */ - public float[][] getComponents(int l, int n, int mm, float factor) + public Point[] getComponents(int l, int n, int mm, float factor) { - float[][] out = new float[getHeight()][3]; + Point[] out = new Point[getHeight()]; for (int i = 0; i < getHeight(); i++) { - out[i][0] = (float) component(i, l) * factor; - out[i][1] = (float) component(i, n) * factor; - out[i][2] = (float) component(i, mm) * factor; + float x = (float) component(i, l) * factor; + float y = (float) component(i, n) * factor; + float z = (float) component(i, mm) * factor; + out[i] = new Point(x, y, z); } return out; @@ -222,136 +144,111 @@ public class PCA implements Runnable { double out = 0.0; - for (int i = 0; i < symm.width(); i++) + for (int i = 0; i < pairwiseScores.width(); i++) { - out += (symm.getValue(row, i) * eigenvector.getValue(i, n)); + out += (pairwiseScores.getValue(row, i) * eigenMatrix.getValue(i, n)); } - return out / eigenvector.getD()[n]; + return out / eigenMatrix.getD()[n]; } + /** + * Answers a formatted text report of the PCA calculation results (matrices + * and eigenvalues) suitable for display + * + * @return + */ public String getDetails() { - return details.toString(); + StringBuilder sb = new StringBuilder(1024); + sb.append("PCA calculation using ").append(scoreModel.getName()) + .append(" sequence similarity matrix\n========\n\n"); + PrintStream ps = wrapOutputBuffer(sb); + + /* + * pairwise similarity scores + */ + sb.append(" --- OrigT * Orig ---- \n"); + pairwiseScores.print(ps, "%8.2f"); + + /* + * tridiagonal matrix, with D and E vectors + */ + sb.append(" ---Tridiag transform matrix ---\n"); + sb.append(" --- D vector ---\n"); + tridiagonal.printD(ps, "%15.4e"); + ps.println(); + sb.append("--- E vector ---\n"); + tridiagonal.printE(ps, "%15.4e"); + ps.println(); + + /* + * eigenvalues matrix, with D vector + */ + sb.append(" --- New diagonalization matrix ---\n"); + eigenMatrix.print(ps, "%8.2f"); + sb.append(" --- Eigenvalues ---\n"); + eigenMatrix.printD(ps, "%15.4e"); + ps.println(); + + return sb.toString(); } /** - * DOCUMENT ME! + * Performs the PCA calculation */ @Override public void run() { - PrintStream ps = new PrintStream(System.out) - { - @Override - public void print(String x) - { - details.append(x); - } - - @Override - public void println() - { - details.append("\n"); - } - }; - - // long now = System.currentTimeMillis(); try { - details.append("PCA Calculation Mode is " - + (jvCalcMode ? "Jalview variant" : "Original SeqSpace") - + "\n"); - - // MatrixI mt = m.transpose(); - // eigenvector = mt.preMultiply(jvCalcMode ? m2 : m); - eigenvector = computePairwiseScores(); - - details.append(" --- OrigT * Orig ---- \n"); - eigenvector.print(ps, "%8.2f"); - - symm = eigenvector.copy(); - - eigenvector.tred(); - - details.append(" ---Tridiag transform matrix ---\n"); - details.append(" --- D vector ---\n"); - eigenvector.printD(ps, "%15.4e"); - ps.println(); - details.append("--- E vector ---\n"); - eigenvector.printE(ps, "%15.4e"); - ps.println(); - - // Now produce the diagonalization matrix - eigenvector.tqli(); + /* + * sequence pairwise similarity scores + */ + pairwiseScores = scoreModel.findSimilarities(seqs, similarityParams); + + /* + * tridiagonal matrix + */ + tridiagonal = pairwiseScores.copy(); + tridiagonal.tred(); + + /* + * the diagonalization matrix + */ + eigenMatrix = tridiagonal.copy(); + eigenMatrix.tqli(); } catch (Exception q) { + Cache.log.error("Error computing PCA: " + q.getMessage()); q.printStackTrace(); - details.append("\n*** Unexpected exception when performing PCA ***\n" - + q.getLocalizedMessage()); - details.append("*** Matrices below may not be fully diagonalised. ***\n"); } - - details.append(" --- New diagonalization matrix ---\n"); - eigenvector.print(ps, "%8.2f"); - details.append(" --- Eigenvalues ---\n"); - eigenvector.printD(ps, "%15.4e"); - ps.println(); - /* - * for (int seq=0;seq - *
  • Sequences:
  • - *
  • FKL
  • - *
  • RSD
  • - *
  • QIA
  • - *
  • GWC
  • - *
  • Score matrix is BLOSUM62
  • - *
  • product [0, 0] = F.F + K.K + L.L = 6 + 5 + 4 = 15
  • - *
  • product [2, 1] = R.R + S.S + D.D = 5 + 4 + 6 = 15
  • - *
  • product [2, 2] = Q.Q + I.I + A.A = 5 + 4 + 4 = 13
  • - *
  • product [3, 3] = G.G + W.W + C.C = 6 + 11 + 9 = 26
  • - *
  • product[0, 1] = F.R + K.S + L.D = -3 + 0 + -3 = -7 - *
  • and so on
  • - * + * Returns a PrintStream that wraps (appends its output to) the given + * StringBuilder + * + * @param sb + * @return */ - MatrixI computePairwiseScores() + protected PrintStream wrapOutputBuffer(StringBuilder sb) { - double[][] values = new double[seqs.length][]; - for (int row = 0; row < seqs.length; row++) + PrintStream ps = new PrintStream(System.out) { - values[row] = new double[seqs.length]; - for (int col = 0; col < seqs.length; col++) + @Override + public void print(String x) { - int total = 0; - int width = Math.min(seqs[row].length(), seqs[col].length()); - for (int i = 0; i < width; i++) - { - char c1 = seqs[row].charAt(i); - char c2 = seqs[col].charAt(i); - int score = scoreMatrix.getPairwiseScore(c1, c2); - total += score; - } - values[row][col] = total; + sb.append(x); } - } - return new Matrix(values); - } - public void setJvCalcMode(boolean calcMode) - { - this.jvCalcMode = calcMode; + @Override + public void println() + { + sb.append("\n"); + } + }; + return ps; } /** @@ -363,6 +260,42 @@ public class PCA implements Runnable public int getHeight() { // TODO can any of seqs[] be null? - return seqs.length; + return pairwiseScores.height();// seqs.getSequences().length; + } + + /** + * Answers the sequence pairwise similarity scores which were the first step + * of the PCA calculation + * + * @return + */ + public MatrixI getPairwiseScores() + { + return pairwiseScores; + } + + public void setPairwiseScores(MatrixI m) + { + pairwiseScores = m; + } + + public MatrixI getEigenmatrix() + { + return eigenMatrix; + } + + public void setEigenmatrix(MatrixI m) + { + eigenMatrix = m; + } + + public MatrixI getTridiagonal() + { + return tridiagonal; + } + + public void setTridiagonal(MatrixI tridiagonal) + { + this.tridiagonal = tridiagonal; } }