From 6d94bfcf539e558b09a08102b228e65e670f77d8 Mon Sep 17 00:00:00 2001 From: jprocter Date: Thu, 8 Dec 2011 17:32:30 +0000 Subject: [PATCH] (JAL-1013) allow sequence type to be specified enabling an appropriate substitution model to be selected for calculating PCA. --- src/jalview/analysis/PCA.java | 38 +++++++++++++++++++++++++++++++++----- 1 file changed, 33 insertions(+), 5 deletions(-) diff --git a/src/jalview/analysis/PCA.java b/src/jalview/analysis/PCA.java index ff4ed38..6498b6d 100755 --- a/src/jalview/analysis/PCA.java +++ b/src/jalview/analysis/PCA.java @@ -20,7 +20,10 @@ package jalview.analysis; import java.io.*; import jalview.datamodel.*; +import jalview.datamodel.BinarySequence.InvalidSequenceTypeException; import jalview.math.*; +import jalview.schemes.ResidueProperties; +import jalview.schemes.ScoreMatrix; /** * Performs Principal Component Analysis on given sequences @@ -44,19 +47,31 @@ public class PCA implements Runnable /** * Creates a new PCA object. - * + * By default, uses blosum62 matrix to generate sequence similarity matrices * @param s - * Set of sequences to perform PCA on + * Set of amino acid sequences to perform PCA on */ public PCA(String[] s) { + this(s, false); + } + + /** + * Creates a new PCA object. + * By default, uses blosum62 matrix to generate sequence similarity matrices + * @param s + * Set of sequences to perform PCA on + * @param nucleotides if true, uses standard DNA/RNA matrix for sequence similarity calculation. + */ + public PCA(String[] s, boolean nucleotides) + { BinarySequence[] bs = new BinarySequence[s.length]; int ii = 0; while ((ii < s.length) && (s[ii] != null)) { - bs[ii] = new BinarySequence(s[ii]); + bs[ii] = new BinarySequence(s[ii],nucleotides); bs[ii].encode(); ii++; } @@ -64,10 +79,23 @@ public class PCA implements Runnable BinarySequence[] bs2 = new BinarySequence[s.length]; ii = 0; + String sm=nucleotides ? "DNA" : "BLOSUM62"; + ScoreMatrix smtrx=ResidueProperties.getScoreMatrix(sm); + details.append("PCA calculation using "+sm+" sequence similarity matrix\n========\n\n"); + while ((ii < s.length) && (s[ii] != null)) { - bs2[ii] = new BinarySequence(s[ii]); - bs2[ii].blosumEncode(); + bs2[ii] = new BinarySequence(s[ii], nucleotides); + if (smtrx != null) + { + try + { + bs2[ii].matrixEncode(smtrx); + } catch (InvalidSequenceTypeException x) + { + details.append("Unexpected mismatch of sequence type and score matrix. Calculation will not be valid!\n\n"); + } + } ii++; } -- 1.7.10.2