From: gmungoc Date: Tue, 17 Jan 2017 13:21:12 +0000 (+0000) Subject: JAL-2379 use SparseMatrix for encoded sequences, tidy unneeded code X-Git-Tag: Release_2_10_3b1~343^2~13 X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=29433ea97308601bc17d76d376c6e629b0b28fa5;p=jalview.git JAL-2379 use SparseMatrix for encoded sequences, tidy unneeded code --- diff --git a/src/jalview/analysis/PCA.java b/src/jalview/analysis/PCA.java index 06a139b..9fc6027 100755 --- a/src/jalview/analysis/PCA.java +++ b/src/jalview/analysis/PCA.java @@ -23,6 +23,8 @@ package jalview.analysis; import jalview.datamodel.BinarySequence; import jalview.datamodel.BinarySequence.InvalidSequenceTypeException; import jalview.math.Matrix; +import jalview.math.MatrixI; +import jalview.math.SparseMatrix; import jalview.schemes.ResidueProperties; import jalview.schemes.ScoreMatrix; @@ -30,23 +32,22 @@ import java.io.PrintStream; /** * Performs Principal Component Analysis on given sequences - * - * @author $author$ - * @version $Revision$ */ public class PCA implements Runnable { - Matrix m; + boolean jvCalcMode = true; + + MatrixI m; - Matrix symm; + MatrixI symm; - Matrix m2; + MatrixI m2; double[] eigenvalue; - Matrix eigenvector; + MatrixI eigenvector; - StringBuffer details = new StringBuffer(); + StringBuilder details = new StringBuilder(1024); /** * Creates a new PCA object. By default, uses blosum62 matrix to generate @@ -89,7 +90,6 @@ public class PCA implements Runnable } BinarySequence[] bs2 = new BinarySequence[s.length]; - ii = 0; ScoreMatrix smtrx = null; String sm = s_m; if (sm != null) @@ -105,6 +105,7 @@ public class PCA implements Runnable } details.append("PCA calculation using " + sm + " sequence similarity matrix\n========\n\n"); + ii = 0; while ((ii < s.length) && (s[ii] != null)) { bs2[ii] = new BinarySequence(s[ii], nucleotides); @@ -121,19 +122,16 @@ public class PCA implements Runnable ii++; } - // System.out.println("Created binary encoding"); - // printMemory(rt); int count = 0; - while ((count < bs.length) && (bs[count] != null)) { count++; } - double[][] seqmat = new double[count][bs[0].getDBinary().length]; - double[][] seqmat2 = new double[count][bs2[0].getDBinary().length]; - int i = 0; + double[][] seqmat = new double[count][]; + double[][] seqmat2 = new double[count][]; + int i = 0; while (i < count) { seqmat[i] = bs[i].getDBinary(); @@ -141,11 +139,12 @@ public class PCA implements Runnable i++; } - // System.out.println("Created array"); - // printMemory(rt); - // System.out.println(" --- Original matrix ---- "); - m = new Matrix(seqmat, count, bs[0].getDBinary().length); - m2 = new Matrix(seqmat2, count, bs2[0].getDBinary().length); + /* + * using a SparseMatrix to hold the encoded sequences matrix + * greatly speeds up matrix multiplication as these are mostly zero + */ + m = new SparseMatrix(seqmat); + m2 = new Matrix(seqmat2); } @@ -155,7 +154,7 @@ public class PCA implements Runnable * @return java.math.Matrix object */ - public Matrix getM() + public MatrixI getM() { return m; } @@ -170,7 +169,7 @@ public class PCA implements Runnable */ public double getEigenvalue(int i) { - return eigenvector.d[i]; + return eigenvector.getD()[i]; } /** @@ -189,9 +188,9 @@ public class PCA implements Runnable */ public float[][] getComponents(int l, int n, int mm, float factor) { - float[][] out = new float[m.rows][3]; + float[][] out = new float[m.height()][3]; - for (int i = 0; i < m.rows; i++) + for (int i = 0; i < m.height(); i++) { out[i][0] = (float) component(i, l) * factor; out[i][1] = (float) component(i, n) * factor; @@ -212,9 +211,9 @@ public class PCA implements Runnable public double[] component(int n) { // n = index of eigenvector - double[] out = new double[m.rows]; + double[] out = new double[m.height()]; - for (int i = 0; i < m.rows; i++) + for (int i = 0; i < m.height(); i++) { out[i] = component(i, n); } @@ -236,12 +235,12 @@ public class PCA implements Runnable { double out = 0.0; - for (int i = 0; i < symm.cols; i++) + for (int i = 0; i < symm.width(); i++) { - out += (symm.value[row][i] * eigenvector.value[i][n]); + out += (symm.getValue(row, i) * eigenvector.getValue(i, n)); } - return out / eigenvector.d[n]; + return out / eigenvector.getD()[n]; } public String getDetails() @@ -252,40 +251,37 @@ public class PCA implements Runnable /** * DOCUMENT ME! */ + @Override public void run() { PrintStream ps = new PrintStream(System.out) { + @Override public void print(String x) { details.append(x); } + @Override public void println() { details.append("\n"); } }; + // long now = System.currentTimeMillis(); try { details.append("PCA Calculation Mode is " + (jvCalcMode ? "Jalview variant" : "Original SeqSpace") + "\n"); - Matrix mt = m.transpose(); + MatrixI mt = m.transpose(); details.append(" --- OrigT * Orig ---- \n"); - if (!jvCalcMode) - { - eigenvector = mt.preMultiply(m); // standard seqspace comparison matrix - } - else - { - eigenvector = mt.preMultiply(m2); // jalview variation on seqsmace - // method - } - eigenvector.print(ps); + eigenvector = mt.preMultiply(jvCalcMode ? m2 : m); + + eigenvector.print(ps, "%8.2f"); symm = eigenvector.copy(); @@ -293,10 +289,10 @@ public class PCA implements Runnable details.append(" ---Tridiag transform matrix ---\n"); details.append(" --- D vector ---\n"); - eigenvector.printD(ps); + eigenvector.printD(ps, "%15.4e"); ps.println(); details.append("--- E vector ---\n"); - eigenvector.printE(ps); + eigenvector.printE(ps, "%15.4e"); ps.println(); // Now produce the diagonalization matrix @@ -310,9 +306,9 @@ public class PCA implements Runnable } details.append(" --- New diagonalization matrix ---\n"); - eigenvector.print(ps); + eigenvector.print(ps, "%8.2f"); details.append(" --- Eigenvalues ---\n"); - eigenvector.printD(ps); + eigenvector.printD(ps, "%15.4e"); ps.println(); /* * for (int seq=0;seq 1e-4) + // { + // comps[i] = pca.component(i); + // } + // } - for (int i = 0; i < ii; i++) - { - if (pca.getEigenvalue(i) > 1e-4) - { - comps[i] = pca.component(i); - } - } - - top = pca.getM().rows - 1; + top = pca.getM().height() - 1; points = new Vector(); float[][] scores = pca.getComponents(top - 1, top - 2, top - 3, 100); - for (int i = 0; i < pca.getM().rows; i++) + for (int i = 0; i < pca.getM().height(); i++) { SequencePoint sp = new SequencePoint(seqs[i], scores[i]); points.addElement(sp); } - } public void updateRc(RotatableCanvasI rc) { - rc.setPoints(points, pca.getM().rows); + rc.setPoints(points, pca.getM().height()); } public boolean isNucleotide() @@ -146,9 +145,9 @@ public class PCAModel // note: actual indices for components are dim1-1, etc (patch for JAL-1123) float[][] scores = pca.getComponents(dim1 - 1, dim2 - 1, dim3 - 1, 100); - for (int i = 0; i < pca.getM().rows; i++) + for (int i = 0; i < pca.getM().height(); i++) { - ((SequencePoint) points.elementAt(i)).coord = scores[i]; + points.elementAt(i).coord = scores[i]; } }