X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;ds=sidebyside;f=src%2Fjalview%2Fanalysis%2FPCA.java;h=42a168dab78a24d40c6f572fd4e10d404f6dcaab;hb=85b18307da5f85a9cb5c13bb6be97eaf2c7f7965;hp=1a4060ced77e9bbf51eb17846276c7be8fe3aa88;hpb=07394c1c2d9d4ae05c85cd6d9644e4d17f2818a2;p=jalview.git diff --git a/src/jalview/analysis/PCA.java b/src/jalview/analysis/PCA.java index 1a4060c..42a168d 100755 --- a/src/jalview/analysis/PCA.java +++ b/src/jalview/analysis/PCA.java @@ -20,10 +20,10 @@ */ package jalview.analysis; -import jalview.math.Matrix; +import jalview.api.analysis.ScoreModelI; +import jalview.api.analysis.SimilarityParamsI; +import jalview.datamodel.AlignmentView; import jalview.math.MatrixI; -import jalview.schemes.ResidueProperties; -import jalview.schemes.ScoreMatrix; import java.io.PrintStream; @@ -32,8 +32,6 @@ import java.io.PrintStream; */ public class PCA implements Runnable { - boolean jvCalcMode = true; - MatrixI symm; double[] eigenvalue; @@ -42,108 +40,20 @@ public class PCA implements Runnable StringBuilder details = new StringBuilder(1024); - private String[] seqs; + final private AlignmentView seqs; - private ScoreMatrix scoreMatrix; + private ScoreModelI scoreModel; - /** - * Creates a new PCA object. By default, uses blosum62 matrix to generate - * sequence similarity matrices - * - * @param s - * Set of amino acid sequences to perform PCA on - */ - public PCA(String[] s) - { - this(s, false); - } + private SimilarityParamsI similarityParams; - /** - * Creates a new PCA object. By default, uses blosum62 matrix to generate - * sequence similarity matrices - * - * @param s - * Set of sequences to perform PCA on - * @param nucleotides - * if true, uses standard DNA/RNA matrix for sequence similarity - * calculation. - */ - public PCA(String[] s, boolean nucleotides) - { - this(s, nucleotides, null); - } - - public PCA(String[] s, boolean nucleotides, String s_m) + public PCA(AlignmentView s, ScoreModelI sm, SimilarityParamsI options) { this.seqs = s; + this.similarityParams = options; + this.scoreModel = sm; - // BinarySequence[] bs = new BinarySequence[s.length]; - // int ii = 0; - // - // while ((ii < s.length) && (s[ii] != null)) - // { - // bs[ii] = new BinarySequence(s[ii], nucleotides); - // bs[ii].encode(); - // ii++; - // } - // - // BinarySequence[] bs2 = new BinarySequence[s.length]; - scoreMatrix = null; - String sm = s_m; - if (sm != null) - { - scoreMatrix = ResidueProperties.getScoreMatrix(sm); - } - if (scoreMatrix == null) - { - // either we were given a non-existent score matrix or a scoremodel that - // isn't based on a pairwise symbol score matrix - scoreMatrix = ResidueProperties - .getScoreMatrix(sm = (nucleotides ? "DNA" : "BLOSUM62")); - } - details.append("PCA calculation using " + sm + details.append("PCA calculation using " + sm.getName() + " sequence similarity matrix\n========\n\n"); - // ii = 0; - // while ((ii < s.length) && (s[ii] != null)) - // { - // bs2[ii] = new BinarySequence(s[ii], nucleotides); - // if (scoreMatrix != null) - // { - // try - // { - // bs2[ii].matrixEncode(scoreMatrix); - // } catch (InvalidSequenceTypeException x) - // { - // details.append("Unexpected mismatch of sequence type and score matrix. Calculation will not be valid!\n\n"); - // } - // } - // ii++; - // } - // - // int count = 0; - // while ((count < bs.length) && (bs[count] != null)) - // { - // count++; - // } - // - // double[][] seqmat = new double[count][]; - // double[][] seqmat2 = new double[count][]; - // - // int i = 0; - // while (i < count) - // { - // seqmat[i] = bs[i].getDBinary(); - // seqmat2[i] = bs2[i].getDBinary(); - // i++; - // } - // - // /* - // * using a SparseMatrix to hold the encoded sequences matrix - // * greatly speeds up matrix multiplication as these are mostly zero - // */ - // m = new SparseMatrix(seqmat); - // m2 = new Matrix(seqmat2); - } /** @@ -259,13 +169,7 @@ public class PCA implements Runnable // long now = System.currentTimeMillis(); try { - details.append("PCA Calculation Mode is " - + (jvCalcMode ? "Jalview variant" : "Original SeqSpace") - + "\n"); - - // MatrixI mt = m.transpose(); - // eigenvector = mt.preMultiply(jvCalcMode ? m2 : m); - eigenvector = computePairwiseScores(); + eigenvector = scoreModel.findSimilarities(seqs, similarityParams); details.append(" --- OrigT * Orig ---- \n"); eigenvector.print(ps, "%8.2f"); @@ -289,7 +193,8 @@ public class PCA implements Runnable q.printStackTrace(); details.append("\n*** Unexpected exception when performing PCA ***\n" + q.getLocalizedMessage()); - details.append("*** Matrices below may not be fully diagonalised. ***\n"); + details.append( + "*** Matrices below may not be fully diagonalised. ***\n"); } details.append(" --- New diagonalization matrix ---\n"); @@ -308,53 +213,6 @@ public class PCA implements Runnable } /** - * Computes an NxN matrix where N is the number of sequences, and entry [i, j] - * is sequence[i] pairwise multiplied with sequence[j], as a sum of scores - * computed using the current score matrix. For example - * - */ - MatrixI computePairwiseScores() - { - double[][] values = new double[seqs.length][]; - for (int row = 0; row < seqs.length; row++) - { - values[row] = new double[seqs.length]; - for (int col = 0; col < seqs.length; col++) - { - int total = 0; - int width = Math.min(seqs[row].length(), seqs[col].length()); - for (int i = 0; i < width; i++) - { - char c1 = seqs[row].charAt(i); - char c2 = seqs[col].charAt(i); - int score = scoreMatrix.getPairwiseScore(c1, c2); - total += score; - } - values[row][col] = total; - } - } - return new Matrix(values); - } - - public void setJvCalcMode(boolean calcMode) - { - this.jvCalcMode = calcMode; - } - - /** * Answers the N dimensions of the NxN PCA matrix. This is the number of * sequences involved in the pairwise score calculation. * @@ -363,6 +221,6 @@ public class PCA implements Runnable public int getHeight() { // TODO can any of seqs[] be null? - return seqs.length; + return seqs.getSequences().length; } }