X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fanalysis%2FPCA.java;h=9babaee1ea6928a1731b3906c1a4cbada0fa5bee;hb=34d5dfb25731d7d797177cadfce954258ae90991;hp=9fc6027fe2644ffd86d0335695a31b26caa79f47;hpb=29433ea97308601bc17d76d376c6e629b0b28fa5;p=jalview.git diff --git a/src/jalview/analysis/PCA.java b/src/jalview/analysis/PCA.java index 9fc6027..9babaee 100755 --- a/src/jalview/analysis/PCA.java +++ b/src/jalview/analysis/PCA.java @@ -20,11 +20,7 @@ */ package jalview.analysis; -import jalview.datamodel.BinarySequence; -import jalview.datamodel.BinarySequence.InvalidSequenceTypeException; -import jalview.math.Matrix; import jalview.math.MatrixI; -import jalview.math.SparseMatrix; import jalview.schemes.ResidueProperties; import jalview.schemes.ScoreMatrix; @@ -37,18 +33,18 @@ public class PCA implements Runnable { boolean jvCalcMode = true; - MatrixI m; - MatrixI symm; - MatrixI m2; - double[] eigenvalue; MatrixI eigenvector; StringBuilder details = new StringBuilder(1024); + private String[] seqs; + + private ScoreMatrix scoreMatrix; + /** * Creates a new PCA object. By default, uses blosum62 matrix to generate * sequence similarity matrices @@ -78,85 +74,23 @@ public class PCA implements Runnable public PCA(String[] s, boolean nucleotides, String s_m) { + this.seqs = s; - BinarySequence[] bs = new BinarySequence[s.length]; - int ii = 0; - - while ((ii < s.length) && (s[ii] != null)) - { - bs[ii] = new BinarySequence(s[ii], nucleotides); - bs[ii].encode(); - ii++; - } - - BinarySequence[] bs2 = new BinarySequence[s.length]; - ScoreMatrix smtrx = null; + scoreMatrix = null; String sm = s_m; if (sm != null) { - smtrx = ResidueProperties.getScoreMatrix(sm); + scoreMatrix = ResidueProperties.getScoreMatrix(sm); } - if (smtrx == null) + if (scoreMatrix == null) { // either we were given a non-existent score matrix or a scoremodel that // isn't based on a pairwise symbol score matrix - smtrx = ResidueProperties.getScoreMatrix(sm = (nucleotides ? "DNA" - : "BLOSUM62")); + scoreMatrix = ResidueProperties + .getScoreMatrix(sm = (nucleotides ? "DNA" : "BLOSUM62")); } details.append("PCA calculation using " + sm + " sequence similarity matrix\n========\n\n"); - ii = 0; - while ((ii < s.length) && (s[ii] != null)) - { - bs2[ii] = new BinarySequence(s[ii], nucleotides); - if (smtrx != null) - { - try - { - bs2[ii].matrixEncode(smtrx); - } catch (InvalidSequenceTypeException x) - { - details.append("Unexpected mismatch of sequence type and score matrix. Calculation will not be valid!\n\n"); - } - } - ii++; - } - - int count = 0; - while ((count < bs.length) && (bs[count] != null)) - { - count++; - } - - double[][] seqmat = new double[count][]; - double[][] seqmat2 = new double[count][]; - - int i = 0; - while (i < count) - { - seqmat[i] = bs[i].getDBinary(); - seqmat2[i] = bs2[i].getDBinary(); - i++; - } - - /* - * using a SparseMatrix to hold the encoded sequences matrix - * greatly speeds up matrix multiplication as these are mostly zero - */ - m = new SparseMatrix(seqmat); - m2 = new Matrix(seqmat2); - - } - - /** - * Returns the matrix used in PCA calculation - * - * @return java.math.Matrix object - */ - - public MatrixI getM() - { - return m; } /** @@ -188,9 +122,9 @@ public class PCA implements Runnable */ public float[][] getComponents(int l, int n, int mm, float factor) { - float[][] out = new float[m.height()][3]; + float[][] out = new float[getHeight()][3]; - for (int i = 0; i < m.height(); i++) + for (int i = 0; i < getHeight(); i++) { out[i][0] = (float) component(i, l) * factor; out[i][1] = (float) component(i, n) * factor; @@ -211,9 +145,9 @@ public class PCA implements Runnable public double[] component(int n) { // n = index of eigenvector - double[] out = new double[m.height()]; + double[] out = new double[getHeight()]; - for (int i = 0; i < m.height(); i++) + for (int i = 0; i < out.length; i++) { out[i] = component(i, n); } @@ -275,12 +209,10 @@ public class PCA implements Runnable details.append("PCA Calculation Mode is " + (jvCalcMode ? "Jalview variant" : "Original SeqSpace") + "\n"); - MatrixI mt = m.transpose(); - details.append(" --- OrigT * Orig ---- \n"); - - eigenvector = mt.preMultiply(jvCalcMode ? m2 : m); + eigenvector = scoreMatrix.computePairwiseScores(seqs); + details.append(" --- OrigT * Orig ---- \n"); eigenvector.print(ps, "%8.2f"); symm = eigenvector.copy(); @@ -324,4 +256,16 @@ public class PCA implements Runnable { this.jvCalcMode = calcMode; } + + /** + * Answers the N dimensions of the NxN PCA matrix. This is the number of + * sequences involved in the pairwise score calculation. + * + * @return + */ + public int getHeight() + { + // TODO can any of seqs[] be null? + return seqs.length; + } }