import java.io.*;
import jalview.datamodel.*;
+import jalview.datamodel.BinarySequence.InvalidSequenceTypeException;
import jalview.math.*;
+import jalview.schemes.ResidueProperties;
+import jalview.schemes.ScoreMatrix;
/**
* Performs Principal Component Analysis on given sequences
/**
* Creates a new PCA object.
- *
+ * By default, uses blosum62 matrix to generate sequence similarity matrices
* @param s
- * Set of sequences to perform PCA on
+ * Set of amino acid sequences to perform PCA on
*/
public PCA(String[] s)
{
+ this(s, false);
+ }
+
+ /**
+ * Creates a new PCA object.
+ * By default, uses blosum62 matrix to generate sequence similarity matrices
+ * @param s
+ * Set of sequences to perform PCA on
+ * @param nucleotides if true, uses standard DNA/RNA matrix for sequence similarity calculation.
+ */
+ public PCA(String[] s, boolean nucleotides)
+ {
BinarySequence[] bs = new BinarySequence[s.length];
int ii = 0;
while ((ii < s.length) && (s[ii] != null))
{
- bs[ii] = new BinarySequence(s[ii]);
+ bs[ii] = new BinarySequence(s[ii],nucleotides);
bs[ii].encode();
ii++;
}
BinarySequence[] bs2 = new BinarySequence[s.length];
ii = 0;
+ String sm=nucleotides ? "DNA" : "BLOSUM62";
+ ScoreMatrix smtrx=ResidueProperties.getScoreMatrix(sm);
+ details.append("PCA calculation using "+sm+" sequence similarity matrix\n========\n\n");
+
while ((ii < s.length) && (s[ii] != null))
{
- bs2[ii] = new BinarySequence(s[ii]);
- bs2[ii].blosumEncode();
+ bs2[ii] = new BinarySequence(s[ii], nucleotides);
+ if (smtrx != null)
+ {
+ try
+ {
+ bs2[ii].matrixEncode(smtrx);
+ } catch (InvalidSequenceTypeException x)
+ {
+ details.append("Unexpected mismatch of sequence type and score matrix. Calculation will not be valid!\n\n");
+ }
+ }
ii++;
}
AlignViewport av;
+ AlignmentView seqstrings;
+
SequenceI[] seqs;
- AlignmentView seqstrings;
+
+ /**
+ * use the identity matrix for calculating similarity between sequences.
+ */
+ private boolean useidentity=false;
+
public PCAPanel(AlignViewport av)
{
this.av = av;
seqstrings = av.getAlignmentView(av.getSelectionGroup() != null);
+ useidentity=av.getAlignment().isNucleotide();
if (av.getSelectionGroup() == null)
{
seqs = av.getAlignment().getSequencesArray();
*/
public void run()
{
- pca = new PCA(seqstrings.getSequenceStrings(' '));
+ pca = new PCA(seqstrings.getSequenceStrings(' '), useidentity);
pca.run();
// Now find the component coordinates
import jalview.schemes.*;
/**
- * DOCUMENT ME!
+ * Encode a sequence as a numeric vector using either classic residue binary
+ * encoding or convolved with residue substitution matrix.
*
* @author $author$
* @version $Revision$
*/
public class BinarySequence extends Sequence
{
+ public class InvalidSequenceTypeException extends Exception
+ {
+
+ public InvalidSequenceTypeException(String string)
+ {
+ super(string);
+ }
+
+ }
+
int[] binary;
double[] dbinary;
+ boolean isNa=false;
/**
* Creates a new BinarySequence object.
*
* @param s
* DOCUMENT ME!
*/
- public BinarySequence(String s)
+ public BinarySequence(String s, boolean isNa)
{
super("", s, 0, s.length());
+ this.isNa=isNa;
}
/**
- * DOCUMENT ME!
+ * clear the dbinary matrix
+ * @return nores - dimension of sequence symbol encoding for this sequence
*/
- public void encode()
+ private int initMatrixGetNoRes()
{
+ int nores=(isNa) ? ResidueProperties.maxNucleotideIndex : ResidueProperties.maxProteinIndex;
// Set all matrix to 0
- dbinary = new double[getSequence().length * 21];
-
- int nores = 21;
+ dbinary = new double[getSequence().length * nores];
for (int i = 0; i < dbinary.length; i++)
{
dbinary[i] = 0.0;
}
-
+ return nores;
+ }
+ private int[] getSymbolmatrix()
+ {
+ return (isNa) ? ResidueProperties.nucleotideIndex : ResidueProperties.aaIndex;
+ }
+ /**
+ * DOCUMENT ME!
+ */
+ public void encode()
+ {
+ int nores=initMatrixGetNoRes();
+ final int[] sindex=getSymbolmatrix();
for (int i = 0; i < getSequence().length; i++)
{
- int aanum = 20;
+ int aanum = nores-1;
try
{
- aanum = ResidueProperties.aaIndex[getCharAt(i)];
+ aanum = sindex[getCharAt(i)];
} catch (NullPointerException e)
{
- aanum = 20;
+ aanum = nores-1;
}
- if (aanum > 20)
+ if (aanum >= nores)
{
- aanum = 20;
+ aanum = nores-1;
}
dbinary[(i * nores) + aanum] = 1.0;
*
* @param matrix
*/
- public void matrixEncode(ScoreMatrix matrix)
+ public void matrixEncode(final ScoreMatrix matrix) throws InvalidSequenceTypeException
{
+ if (isNa!=matrix.isDNA())
+ {
+ throw new InvalidSequenceTypeException("matrix "
+ + matrix.getClass().getCanonicalName()
+ + " is not a valid matrix for "
+ + (isNa ? "nucleotide" : "protein") + "sequences");
+ }
matrixEncode(matrix.isDNA() ? ResidueProperties.nucleotideIndex
: ResidueProperties.aaIndex, matrix.getMatrix());
}
- /**
- * DOCUMENT ME!
- */
- public void blosumEncode()
- {
- matrixEncode(ResidueProperties.aaIndex, ResidueProperties.getBLOSUM62());
- }
-
- private void matrixEncode(int[] aaIndex, int[][] matrix)
+ private void matrixEncode(final int[] aaIndex, final int[][] matrix)
{
// Set all matrix to 0
- dbinary = new double[getSequence().length * 21];
+ // dbinary = new double[getSequence().length * 21];
- int nores = 21;
+ int nores = initMatrixGetNoRes();
// for (int i = 0; i < dbinary.length; i++) {
// dbinary[i] = 0.0;
// }
- for (int i = 0; i < getSequence().length; i++)
+ for (int i = 0,iSize=getSequence().length; i<iSize; i++)
{
- int aanum = 20;
+ int aanum = nores-1;
try
{
aanum = aaIndex[getCharAt(i)];
} catch (NullPointerException e)
{
- aanum = 20;
+ aanum = nores-1;
}
- if (aanum > 20)
+ if (aanum >=nores)
{
- aanum = 20;
+ aanum = nores-1;
}
- // Do the blosum thing
+ // Do the blosum^H^H^H^H^H score matrix summation thing
- for (int j = 0; j < 20; j++)
+ for (int j = 0; j < nores; j++)
{
dbinary[(i * nores) + j] = matrix[aanum][j];
}
SequenceI[] seqs;
/**
+ * use the identity matrix for calculating similarity between sequences.
+ */
+ private boolean nucleotide=false;
+
+ /**
* Creates a new PCAPanel object.
*
* @param av
boolean sameLength = true;
seqstrings = av.getAlignmentView(av.getSelectionGroup() != null);
+ nucleotide=av.getAlignment().isNucleotide();
if (av.getSelectionGroup() == null)
{
seqs = av.getAlignment().getSequencesArray();
return;
}
- Desktop.addInternalFrame(this, "Principal component analysis", 400, 400);
-
PaintRefresher.Register(this, av.getSequenceSetId());
rc = new RotatableCanvas(ap);
{
try
{
- pca = new PCA(seqstrings.getSequenceStrings(' '));
+ calcSettings.setEnabled(false);
+ pca = new PCA(seqstrings.getSequenceStrings(' '), nucleotide);
pca.run();
// Now find the component coordinates
}
rc.setPoints(points, pca.getM().rows);
- rc.repaint();
-
- addKeyListener(rc);
+ // rc.invalidate();
+ nuclSetting.setSelected(nucleotide);
+ protSetting.setSelected(!nucleotide);
} catch (OutOfMemoryError er)
{
new OOMWarning("calculating PCA", er);
-
+ return;
+ }
+ calcSettings.setEnabled(true);
+ repaint();
+ if (getParent()==null)
+ {
+ addKeyListener(rc);
+ Desktop.addInternalFrame(this, "Principal component analysis", 400, 400);
}
-
}
-
+ @Override
+ protected void nuclSetting_actionPerfomed(ActionEvent arg0)
+ {
+ nucleotide=true;
+ Thread worker = new Thread(this);
+ worker.start();
+ }
+ @Override
+ protected void protSetting_actionPerfomed(ActionEvent arg0)
+ {
+ nucleotide=false;
+ Thread worker = new Thread(this);
+ worker.start();
+ }
/**
* DOCUMENT ME!
*/
repaint();
}
+ boolean first=true;
public void setPoints(Vector points, int npoint)
{
this.points = points;
this.npoint = npoint;
+ if (first) {
ToolTipManager.sharedInstance().registerComponent(this);
ToolTipManager.sharedInstance().setInitialDelay(0);
ToolTipManager.sharedInstance().setDismissDelay(10000);
-
+ }
prefsize = getPreferredSize();
orig = new float[npoint][3];
findWidth();
scale = findScale();
-
+ if (first) {
+
addMouseListener(this);
addMouseMotionListener(this);
-
+ }
+ first=false;
}
public void initAxes()
protected JMenu associateViewsMenu = new JMenu();
+ protected JMenu calcSettings=new JMenu();
+ protected JCheckBoxMenuItem nuclSetting=new JCheckBoxMenuItem();
+ protected JCheckBoxMenuItem protSetting=new JCheckBoxMenuItem();
public GPCAPanel()
{
try
}
});
associateViewsMenu.setText("Associate Nodes With");
+ calcSettings.setText("Change Parameters");
+ nuclSetting.setText("Nucleotide matrix");
+ protSetting.setText("Protein matrix");
+ nuclSetting.addActionListener(new ActionListener()
+ {
+
+ @Override
+ public void actionPerformed(ActionEvent arg0)
+ {
+ nuclSetting_actionPerfomed(arg0);
+ }
+ });
+ protSetting.addActionListener(new ActionListener()
+ {
+
+ @Override
+ public void actionPerformed(ActionEvent arg0)
+ {
+ protSetting_actionPerfomed(arg0);
+ }
+ });calcSettings.add(nuclSetting);
+ calcSettings.add(protSetting);
+
+
this.getContentPane().add(jPanel2, BorderLayout.SOUTH);
jPanel2.add(jLabel1, null);
jPanel2.add(xCombobox, null);
jPanel2.add(zCombobox, null);
jMenuBar1.add(fileMenu);
jMenuBar1.add(viewMenu);
+ jMenuBar1.add(calcSettings);
fileMenu.add(saveMenu);
fileMenu.add(outputValues);
fileMenu.add(print);
viewMenu.add(associateViewsMenu);
}
+ protected void protSetting_actionPerfomed(ActionEvent arg0)
+ {
+ // TODO Auto-generated method stub
+
+ }
+
+ protected void nuclSetting_actionPerfomed(ActionEvent arg0)
+ {
+ // TODO Auto-generated method stub
+
+ }
+
protected void outputPoints_actionPerformed(ActionEvent e)
{
// TODO Auto-generated method stub
// extend subt. matrices
}
+ /**
+ * maximum (gap) index for matrices involving protein alphabet
+ */
+ public final static int maxProteinIndex=23;
+ /**
+ * maximum (gap) index for matrices involving nucleotide alphabet
+ */
+ public final static int maxNucleotideIndex=10;
+
static
{
nucleotideIndex = new int[255];
* new Color(60, 136, 238), // U Color.white, // I Color.white, // X
* Color.white, // R Color.white, // Y Color.white, // N Color.white, // Gap
*/
+
+ // JBPNote: patch matrix for T/U equivalence when working with DNA or RNA.
+ // Will equate sequences if working with mixed nucleotide sets.
+ // treats T and U identically. R and Y weak equivalence with AG and CTU.
+ // N matches any other base weakly
+ //
static final int[][] DNA =
{
- { 10, -8, -8, -8, 1, 0, 0, 0, 0, 0, 1 }, // C
- { -8, 10, -8, -8, 1, 0, 0, 0, 0, 0, 1 }, // T
- { -8, -8, 10, -8, 1, 0, 0, 0, 0, 0, 1 }, // A
- { -8, -8, -8, 10, 1, 0, 0, 0, 0, 0, 1 }, // G
- { 1, 1, 1, 1, 10, 0, 0, 0, 0, 0, 1 }, // -
- { 1, 1, 1, 1, 1, 10, 0, 0, 0, 0, 1 }, // -
- { 1, 1, 1, 1, 1, 0, 10, 0, 0, 0, 1 }, // -
- { 1, 1, 1, 1, 1, 0, 0, 10, 0, 0, 1 }, // -
- { 1, 1, 1, 1, 1, 0, 0, 0, 10, 0, 1 }, // -
- { 1, 1, 1, 1, 1, 0, 0, 0, 0, 10, 1 }, // -
- { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, // -
+ { 10, -8, -8, -8, -8, 1, 1, -8, 1, 1, 1 }, // C
+ { -8, 10, -8, -8, 10, 1, 1, -8, 1, 1, 1 }, // T
+ { -8, -8, 10, -8, -8, 1, 1, 1, -8, 1, 1 }, // A
+ { -8, -8, -8, 10, -8, 1, 1, 1, -8, 1, 1 }, // G
+ { -8, 10, -8, -8, 10, 1, 1, -8, 1, 1, 1 }, // U
+ { 1, 1, 1, 1, 1, 10, 0, 0, 0, 1, 1 }, // I
+ { 1, 1, 1, 1, 1, 0, 10, 0, 0, 1, 1 }, // X
+ { -8, -8, 1, 1, -8, 0, 0, 10, 0, 1, 1 }, // R
+ { 1, 1, -8, -8, 1, 0, 0, 0, 10, 1, 1 }, // Y
+ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 10, 1 }, // N
+ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, // -
};
- /**
+/**
* register matrices in list
*/
static
scoreMatrices.put("BLOSUM62", new ScoreMatrix("BLOSUM62", BLOSUM62, 0));
scoreMatrices.put("PAM250", new ScoreMatrix("PAM250", PAM250, 0));
scoreMatrices.put("DNA", new ScoreMatrix("DNA", DNA, 1));
+
}
public static final Color[] pidColours =