/*
* Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
* Copyright (C) $$Year-Rel$$ The Jalview Authors
*
* This file is part of Jalview.
*
* Jalview is free software: you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation, either version 3
* of the License, or (at your option) any later version.
*
* Jalview is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty
* of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Jalview. If not, see .
* The Jalview Authors are detailed in the 'AUTHORS' file.
*/
package jalview.analysis;
import java.io.*;
import jalview.datamodel.*;
import jalview.datamodel.BinarySequence.InvalidSequenceTypeException;
import jalview.math.*;
import jalview.schemes.ResidueProperties;
import jalview.schemes.ScoreMatrix;
/**
* Performs Principal Component Analysis on given sequences
*
* @author $author$
* @version $Revision$
*/
public class PCA implements Runnable
{
Matrix m;
Matrix symm;
Matrix m2;
double[] eigenvalue;
Matrix eigenvector;
StringBuffer details = new StringBuffer();
/**
* Creates a new PCA object. By default, uses blosum62 matrix to generate
* sequence similarity matrices
*
* @param s
* Set of amino acid sequences to perform PCA on
*/
public PCA(String[] s)
{
this(s, false);
}
/**
* Creates a new PCA object. By default, uses blosum62 matrix to generate
* sequence similarity matrices
*
* @param s
* Set of sequences to perform PCA on
* @param nucleotides
* if true, uses standard DNA/RNA matrix for sequence similarity
* calculation.
*/
public PCA(String[] s, boolean nucleotides)
{
this(s, nucleotides, null);
}
public PCA(String[] s, boolean nucleotides, String s_m)
{
BinarySequence[] bs = new BinarySequence[s.length];
int ii = 0;
while ((ii < s.length) && (s[ii] != null))
{
bs[ii] = new BinarySequence(s[ii], nucleotides);
bs[ii].encode();
ii++;
}
BinarySequence[] bs2 = new BinarySequence[s.length];
ii = 0;
ScoreMatrix smtrx = null;
String sm = s_m;
if (sm != null)
{
smtrx = ResidueProperties.getScoreMatrix(sm);
}
if (smtrx == null)
{
// either we were given a non-existent score matrix or a scoremodel that
// isn't based on a pairwise symbol score matrix
smtrx = ResidueProperties.getScoreMatrix(sm = (nucleotides ? "DNA"
: "BLOSUM62"));
}
details.append("PCA calculation using " + sm
+ " sequence similarity matrix\n========\n\n");
while ((ii < s.length) && (s[ii] != null))
{
bs2[ii] = new BinarySequence(s[ii], nucleotides);
if (smtrx != null)
{
try
{
bs2[ii].matrixEncode(smtrx);
} catch (InvalidSequenceTypeException x)
{
details.append("Unexpected mismatch of sequence type and score matrix. Calculation will not be valid!\n\n");
}
}
ii++;
}
// System.out.println("Created binary encoding");
// printMemory(rt);
int count = 0;
while ((count < bs.length) && (bs[count] != null))
{
count++;
}
double[][] seqmat = new double[count][bs[0].getDBinary().length];
double[][] seqmat2 = new double[count][bs2[0].getDBinary().length];
int i = 0;
while (i < count)
{
seqmat[i] = bs[i].getDBinary();
seqmat2[i] = bs2[i].getDBinary();
i++;
}
// System.out.println("Created array");
// printMemory(rt);
// System.out.println(" --- Original matrix ---- ");
m = new Matrix(seqmat, count, bs[0].getDBinary().length);
m2 = new Matrix(seqmat2, count, bs2[0].getDBinary().length);
}
/**
* Returns the matrix used in PCA calculation
*
* @return java.math.Matrix object
*/
public Matrix getM()
{
return m;
}
/**
* Returns Eigenvalue
*
* @param i
* Index of diagonal within matrix
*
* @return Returns value of diagonal from matrix
*/
public double getEigenvalue(int i)
{
return eigenvector.d[i];
}
/**
* DOCUMENT ME!
*
* @param l
* DOCUMENT ME!
* @param n
* DOCUMENT ME!
* @param mm
* DOCUMENT ME!
* @param factor
* DOCUMENT ME!
*
* @return DOCUMENT ME!
*/
public float[][] getComponents(int l, int n, int mm, float factor)
{
float[][] out = new float[m.rows][3];
for (int i = 0; i < m.rows; i++)
{
out[i][0] = (float) component(i, l) * factor;
out[i][1] = (float) component(i, n) * factor;
out[i][2] = (float) component(i, mm) * factor;
}
return out;
}
/**
* DOCUMENT ME!
*
* @param n
* DOCUMENT ME!
*
* @return DOCUMENT ME!
*/
public double[] component(int n)
{
// n = index of eigenvector
double[] out = new double[m.rows];
for (int i = 0; i < m.rows; i++)
{
out[i] = component(i, n);
}
return out;
}
/**
* DOCUMENT ME!
*
* @param row
* DOCUMENT ME!
* @param n
* DOCUMENT ME!
*
* @return DOCUMENT ME!
*/
double component(int row, int n)
{
double out = 0.0;
for (int i = 0; i < symm.cols; i++)
{
out += (symm.value[row][i] * eigenvector.value[i][n]);
}
return out / eigenvector.d[n];
}
public String getDetails()
{
return details.toString();
}
/**
* DOCUMENT ME!
*/
public void run()
{
PrintStream ps = new PrintStream(System.out)
{
public void print(String x)
{
details.append(x);
}
public void println()
{
details.append("\n");
}
};
try
{
details.append("PCA Calculation Mode is "
+ (jvCalcMode ? "Jalview variant" : "Original SeqSpace")
+ "\n");
Matrix mt = m.transpose();
details.append(" --- OrigT * Orig ---- \n");
if (!jvCalcMode)
{
eigenvector = mt.preMultiply(m); // standard seqspace comparison matrix
}
else
{
eigenvector = mt.preMultiply(m2); // jalview variation on seqsmace
// method
}
eigenvector.print(ps);
symm = eigenvector.copy();
eigenvector.tred();
details.append(" ---Tridiag transform matrix ---\n");
details.append(" --- D vector ---\n");
eigenvector.printD(ps);
ps.println();
details.append("--- E vector ---\n");
eigenvector.printE(ps);
ps.println();
// Now produce the diagonalization matrix
eigenvector.tqli();
} catch (Exception q)
{
q.printStackTrace();
details.append("\n*** Unexpected exception when performing PCA ***\n"
+ q.getLocalizedMessage());
details.append("*** Matrices below may not be fully diagonalised. ***\n");
}
details.append(" --- New diagonalization matrix ---\n");
eigenvector.print(ps);
details.append(" --- Eigenvalues ---\n");
eigenvector.printD(ps);
ps.println();
/*
* for (int seq=0;seq