X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fanalysis%2FPCA.java;h=7d6b8224523986eb33ae86c2c55a2aa139cd48d2;hb=0ce9577d119e1cc646f4b0d2e961f698d994fcc5;hp=6498b6d1dbecd52e08a6efef77c54f775acc0709;hpb=6d94bfcf539e558b09a08102b228e65e670f77d8;p=jalview.git diff --git a/src/jalview/analysis/PCA.java b/src/jalview/analysis/PCA.java index 6498b6d..7d6b822 100755 --- a/src/jalview/analysis/PCA.java +++ b/src/jalview/analysis/PCA.java @@ -1,53 +1,58 @@ /* - * Jalview - A Sequence Alignment Editor and Viewer (Version 2.7) - * Copyright (C) 2011 J Procter, AM Waterhouse, G Barton, M Clamp, S Searle + * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) + * Copyright (C) $$Year-Rel$$ The Jalview Authors * * This file is part of Jalview. * * Jalview is free software: you can redistribute it and/or * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - * + * as published by the Free Software Foundation, either version 3 + * of the License, or (at your option) any later version. + * * Jalview is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty * of MERCHANTABILITY or FITNESS FOR A PARTICULAR * PURPOSE. See the GNU General Public License for more details. * - * You should have received a copy of the GNU General Public License along with Jalview. If not, see . + * You should have received a copy of the GNU General Public License + * along with Jalview. If not, see . + * The Jalview Authors are detailed in the 'AUTHORS' file. */ package jalview.analysis; -import java.io.*; - -import jalview.datamodel.*; +import jalview.datamodel.BinarySequence; import jalview.datamodel.BinarySequence.InvalidSequenceTypeException; -import jalview.math.*; +import jalview.math.Matrix; +import jalview.math.MatrixI; +import jalview.math.SparseMatrix; import jalview.schemes.ResidueProperties; import jalview.schemes.ScoreMatrix; +import java.io.PrintStream; + /** * Performs Principal Component Analysis on given sequences - * - * @author $author$ - * @version $Revision$ */ public class PCA implements Runnable { - Matrix m; + boolean jvCalcMode = true; + + MatrixI m; - Matrix symm; + MatrixI symm; - Matrix m2; + MatrixI m2; double[] eigenvalue; - Matrix eigenvector; + MatrixI eigenvector; - StringBuffer details = new StringBuffer(); + StringBuilder details = new StringBuilder(1024); /** - * Creates a new PCA object. - * By default, uses blosum62 matrix to generate sequence similarity matrices + * Creates a new PCA object. By default, uses blosum62 matrix to generate + * sequence similarity matrices + * * @param s * Set of amino acid sequences to perform PCA on */ @@ -55,34 +60,52 @@ public class PCA implements Runnable { this(s, false); } - + /** - * Creates a new PCA object. - * By default, uses blosum62 matrix to generate sequence similarity matrices + * Creates a new PCA object. By default, uses blosum62 matrix to generate + * sequence similarity matrices + * * @param s * Set of sequences to perform PCA on - * @param nucleotides if true, uses standard DNA/RNA matrix for sequence similarity calculation. + * @param nucleotides + * if true, uses standard DNA/RNA matrix for sequence similarity + * calculation. */ public PCA(String[] s, boolean nucleotides) { + this(s, nucleotides, null); + } + + public PCA(String[] s, boolean nucleotides, String s_m) + { BinarySequence[] bs = new BinarySequence[s.length]; int ii = 0; while ((ii < s.length) && (s[ii] != null)) { - bs[ii] = new BinarySequence(s[ii],nucleotides); + bs[ii] = new BinarySequence(s[ii], nucleotides); bs[ii].encode(); ii++; } BinarySequence[] bs2 = new BinarySequence[s.length]; + ScoreMatrix smtrx = null; + String sm = s_m; + if (sm != null) + { + smtrx = ResidueProperties.getScoreMatrix(sm); + } + if (smtrx == null) + { + // either we were given a non-existent score matrix or a scoremodel that + // isn't based on a pairwise symbol score matrix + smtrx = ResidueProperties.getScoreMatrix(sm = (nucleotides ? "DNA" + : "BLOSUM62")); + } + details.append("PCA calculation using " + sm + + " sequence similarity matrix\n========\n\n"); ii = 0; - - String sm=nucleotides ? "DNA" : "BLOSUM62"; - ScoreMatrix smtrx=ResidueProperties.getScoreMatrix(sm); - details.append("PCA calculation using "+sm+" sequence similarity matrix\n========\n\n"); - while ((ii < s.length) && (s[ii] != null)) { bs2[ii] = new BinarySequence(s[ii], nucleotides); @@ -99,19 +122,16 @@ public class PCA implements Runnable ii++; } - // System.out.println("Created binary encoding"); - // printMemory(rt); int count = 0; - while ((count < bs.length) && (bs[count] != null)) { count++; } - double[][] seqmat = new double[count][bs[0].getDBinary().length]; - double[][] seqmat2 = new double[count][bs2[0].getDBinary().length]; - int i = 0; + double[][] seqmat = new double[count][]; + double[][] seqmat2 = new double[count][]; + int i = 0; while (i < count) { seqmat[i] = bs[i].getDBinary(); @@ -119,21 +139,22 @@ public class PCA implements Runnable i++; } - // System.out.println("Created array"); - // printMemory(rt); - // System.out.println(" --- Original matrix ---- "); - m = new Matrix(seqmat, count, bs[0].getDBinary().length); - m2 = new Matrix(seqmat2, count, bs2[0].getDBinary().length); + /* + * using a SparseMatrix to hold the encoded sequences matrix + * greatly speeds up matrix multiplication as these are mostly zero + */ + m = new SparseMatrix(seqmat); + m2 = new Matrix(seqmat2); } /** * Returns the matrix used in PCA calculation * - * @return java.math.Matrix object + * @return */ - public Matrix getM() + public MatrixI getM() { return m; } @@ -148,7 +169,7 @@ public class PCA implements Runnable */ public double getEigenvalue(int i) { - return eigenvector.d[i]; + return eigenvector.getD()[i]; } /** @@ -167,9 +188,9 @@ public class PCA implements Runnable */ public float[][] getComponents(int l, int n, int mm, float factor) { - float[][] out = new float[m.rows][3]; + float[][] out = new float[m.height()][3]; - for (int i = 0; i < m.rows; i++) + for (int i = 0; i < m.height(); i++) { out[i][0] = (float) component(i, l) * factor; out[i][1] = (float) component(i, n) * factor; @@ -190,9 +211,9 @@ public class PCA implements Runnable public double[] component(int n) { // n = index of eigenvector - double[] out = new double[m.rows]; + double[] out = new double[m.height()]; - for (int i = 0; i < m.rows; i++) + for (int i = 0; i < m.height(); i++) { out[i] = component(i, n); } @@ -214,12 +235,12 @@ public class PCA implements Runnable { double out = 0.0; - for (int i = 0; i < symm.cols; i++) + for (int i = 0; i < symm.width(); i++) { - out += (symm.value[row][i] * eigenvector.value[i][n]); + out += (symm.getValue(row, i) * eigenvector.getValue(i, n)); } - return out / eigenvector.d[n]; + return out / eigenvector.getD()[n]; } public String getDetails() @@ -230,48 +251,64 @@ public class PCA implements Runnable /** * DOCUMENT ME! */ + @Override public void run() { - Matrix mt = m.transpose(); - - details.append(" --- OrigT * Orig ---- \n"); - // eigenvector = mt.preMultiply(m); // standard seqspace comparison matrix - eigenvector = mt.preMultiply(m2); // jalview variation on seqsmace method - PrintStream ps = new PrintStream(System.out) { + @Override public void print(String x) { details.append(x); } + @Override public void println() { details.append("\n"); } }; - eigenvector.print(ps); + // long now = System.currentTimeMillis(); + try + { + details.append("PCA Calculation Mode is " + + (jvCalcMode ? "Jalview variant" : "Original SeqSpace") + + "\n"); + MatrixI mt = m.transpose(); - symm = eigenvector.copy(); + details.append(" --- OrigT * Orig ---- \n"); - eigenvector.tred(); + eigenvector = mt.preMultiply(jvCalcMode ? m2 : m); - details.append(" ---Tridiag transform matrix ---\n"); - details.append(" --- D vector ---\n"); - eigenvector.printD(ps); - ps.println(); - details.append("--- E vector ---\n"); - eigenvector.printE(ps); - ps.println(); + eigenvector.print(ps, "%8.2f"); - // Now produce the diagonalization matrix - eigenvector.tqli(); + symm = eigenvector.copy(); + + eigenvector.tred(); + + details.append(" ---Tridiag transform matrix ---\n"); + details.append(" --- D vector ---\n"); + eigenvector.printD(ps, "%15.4e"); + ps.println(); + details.append("--- E vector ---\n"); + eigenvector.printE(ps, "%15.4e"); + ps.println(); + + // Now produce the diagonalization matrix + eigenvector.tqli(); + } catch (Exception q) + { + q.printStackTrace(); + details.append("\n*** Unexpected exception when performing PCA ***\n" + + q.getLocalizedMessage()); + details.append("*** Matrices below may not be fully diagonalised. ***\n"); + } details.append(" --- New diagonalization matrix ---\n"); - eigenvector.print(ps); + eigenvector.print(ps, "%8.2f"); details.append(" --- Eigenvalues ---\n"); - eigenvector.printD(ps); + eigenvector.printD(ps, "%15.4e"); ps.println(); /* * for (int seq=0;seq