From: jprocter Date: Fri, 15 Jun 2012 08:58:07 +0000 (+0100) Subject: Merge branch 'JAL-1013_pca_rna_dna' into develop X-Git-Tag: Jalview_2_9~463 X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=9793a4e49836be392513c2a51e9fc71a0b0eb417;hp=1a4b77f5614636b009e9a4cf2924ad00f4e8cb79;p=jalview.git Merge branch 'JAL-1013_pca_rna_dna' into develop --- diff --git a/src/jalview/analysis/PCA.java b/src/jalview/analysis/PCA.java index 47f2d0d..b001549 100755 --- a/src/jalview/analysis/PCA.java +++ b/src/jalview/analysis/PCA.java @@ -20,7 +20,10 @@ package jalview.analysis; import java.io.*; import jalview.datamodel.*; +import jalview.datamodel.BinarySequence.InvalidSequenceTypeException; import jalview.math.*; +import jalview.schemes.ResidueProperties; +import jalview.schemes.ScoreMatrix; /** * Performs Principal Component Analysis on given sequences @@ -44,19 +47,31 @@ public class PCA implements Runnable /** * Creates a new PCA object. - * + * By default, uses blosum62 matrix to generate sequence similarity matrices * @param s - * Set of sequences to perform PCA on + * Set of amino acid sequences to perform PCA on */ public PCA(String[] s) { + this(s, false); + } + + /** + * Creates a new PCA object. + * By default, uses blosum62 matrix to generate sequence similarity matrices + * @param s + * Set of sequences to perform PCA on + * @param nucleotides if true, uses standard DNA/RNA matrix for sequence similarity calculation. + */ + public PCA(String[] s, boolean nucleotides) + { BinarySequence[] bs = new BinarySequence[s.length]; int ii = 0; while ((ii < s.length) && (s[ii] != null)) { - bs[ii] = new BinarySequence(s[ii]); + bs[ii] = new BinarySequence(s[ii],nucleotides); bs[ii].encode(); ii++; } @@ -64,10 +79,23 @@ public class PCA implements Runnable BinarySequence[] bs2 = new BinarySequence[s.length]; ii = 0; + String sm=nucleotides ? "DNA" : "BLOSUM62"; + ScoreMatrix smtrx=ResidueProperties.getScoreMatrix(sm); + details.append("PCA calculation using "+sm+" sequence similarity matrix\n========\n\n"); + while ((ii < s.length) && (s[ii] != null)) { - bs2[ii] = new BinarySequence(s[ii]); - bs2[ii].blosumEncode(); + bs2[ii] = new BinarySequence(s[ii], nucleotides); + if (smtrx != null) + { + try + { + bs2[ii].matrixEncode(smtrx); + } catch (InvalidSequenceTypeException x) + { + details.append("Unexpected mismatch of sequence type and score matrix. Calculation will not be valid!\n\n"); + } + } ii++; } diff --git a/src/jalview/appletgui/PCAPanel.java b/src/jalview/appletgui/PCAPanel.java index 6568b5b..37b4281 100755 --- a/src/jalview/appletgui/PCAPanel.java +++ b/src/jalview/appletgui/PCAPanel.java @@ -36,9 +36,16 @@ public class PCAPanel extends EmbmenuFrame implements Runnable, AlignViewport av; + AlignmentView seqstrings; + SequenceI[] seqs; - AlignmentView seqstrings; + + /** + * use the identity matrix for calculating similarity between sequences. + */ + private boolean useidentity=false; + public PCAPanel(AlignViewport av) { @@ -59,6 +66,7 @@ public class PCAPanel extends EmbmenuFrame implements Runnable, this.av = av; seqstrings = av.getAlignmentView(av.getSelectionGroup() != null); + useidentity=av.getAlignment().isNucleotide(); if (av.getSelectionGroup() == null) { seqs = av.getAlignment().getSequencesArray(); @@ -96,7 +104,7 @@ public class PCAPanel extends EmbmenuFrame implements Runnable, */ public void run() { - pca = new PCA(seqstrings.getSequenceStrings(' ')); + pca = new PCA(seqstrings.getSequenceStrings(' '), useidentity); pca.run(); // Now find the component coordinates diff --git a/src/jalview/datamodel/BinarySequence.java b/src/jalview/datamodel/BinarySequence.java index 04cfdc9..062f6ba 100755 --- a/src/jalview/datamodel/BinarySequence.java +++ b/src/jalview/datamodel/BinarySequence.java @@ -20,58 +20,83 @@ package jalview.datamodel; import jalview.schemes.*; /** - * DOCUMENT ME! + * Encode a sequence as a numeric vector using either classic residue binary + * encoding or convolved with residue substitution matrix. * * @author $author$ * @version $Revision$ */ public class BinarySequence extends Sequence { + public class InvalidSequenceTypeException extends Exception + { + + public InvalidSequenceTypeException(String string) + { + super(string); + } + + } + int[] binary; double[] dbinary; + boolean isNa=false; /** * Creates a new BinarySequence object. * * @param s * DOCUMENT ME! */ - public BinarySequence(String s) + public BinarySequence(String s, boolean isNa) { super("", s, 0, s.length()); + this.isNa=isNa; } /** - * DOCUMENT ME! + * clear the dbinary matrix + * @return nores - dimension of sequence symbol encoding for this sequence */ - public void encode() + private int initMatrixGetNoRes() { + int nores=(isNa) ? ResidueProperties.maxNucleotideIndex : ResidueProperties.maxProteinIndex; // Set all matrix to 0 - dbinary = new double[getSequence().length * 21]; - - int nores = 21; + dbinary = new double[getSequence().length * nores]; for (int i = 0; i < dbinary.length; i++) { dbinary[i] = 0.0; } - + return nores; + } + private int[] getSymbolmatrix() + { + return (isNa) ? ResidueProperties.nucleotideIndex : ResidueProperties.aaIndex; + } + /** + * DOCUMENT ME! + */ + public void encode() + { + int nores=initMatrixGetNoRes(); + final int[] sindex=getSymbolmatrix(); for (int i = 0; i < getSequence().length; i++) { - int aanum = 20; + int aanum = nores-1; try { - aanum = ResidueProperties.aaIndex[getCharAt(i)]; + aanum = sindex[getCharAt(i)]; } catch (NullPointerException e) { - aanum = 20; + aanum = nores-1; } - if (aanum > 20) + if (aanum >= nores) { - aanum = 20; + aanum = nores-1; } dbinary[(i * nores) + aanum] = 1.0; @@ -83,50 +108,49 @@ public class BinarySequence extends Sequence * * @param matrix */ - public void matrixEncode(ScoreMatrix matrix) + public void matrixEncode(final ScoreMatrix matrix) throws InvalidSequenceTypeException { + if (isNa!=matrix.isDNA()) + { + throw new InvalidSequenceTypeException("matrix " + + matrix.getClass().getCanonicalName() + + " is not a valid matrix for " + + (isNa ? "nucleotide" : "protein") + "sequences"); + } matrixEncode(matrix.isDNA() ? ResidueProperties.nucleotideIndex : ResidueProperties.aaIndex, matrix.getMatrix()); } - /** - * DOCUMENT ME! - */ - public void blosumEncode() - { - matrixEncode(ResidueProperties.aaIndex, ResidueProperties.getBLOSUM62()); - } - - private void matrixEncode(int[] aaIndex, int[][] matrix) + private void matrixEncode(final int[] aaIndex, final int[][] matrix) { // Set all matrix to 0 - dbinary = new double[getSequence().length * 21]; + // dbinary = new double[getSequence().length * 21]; - int nores = 21; + int nores = initMatrixGetNoRes(); // for (int i = 0; i < dbinary.length; i++) { // dbinary[i] = 0.0; // } - for (int i = 0; i < getSequence().length; i++) + for (int i = 0,iSize=getSequence().length; i 20) + if (aanum >=nores) { - aanum = 20; + aanum = nores-1; } - // Do the blosum thing + // Do the blosum^H^H^H^H^H score matrix summation thing - for (int j = 0; j < 20; j++) + for (int j = 0; j < nores; j++) { dbinary[(i * nores) + j] = matrix[aanum][j]; } diff --git a/src/jalview/gui/PCAPanel.java b/src/jalview/gui/PCAPanel.java index fe0c4f9..9dc73b7 100755 --- a/src/jalview/gui/PCAPanel.java +++ b/src/jalview/gui/PCAPanel.java @@ -52,6 +52,11 @@ public class PCAPanel extends GPCAPanel implements Runnable SequenceI[] seqs; /** + * use the identity matrix for calculating similarity between sequences. + */ + private boolean nucleotide=false; + + /** * Creates a new PCAPanel object. * * @param av @@ -67,6 +72,7 @@ public class PCAPanel extends GPCAPanel implements Runnable boolean sameLength = true; seqstrings = av.getAlignmentView(av.getSelectionGroup() != null); + nucleotide=av.getAlignment().isNucleotide(); if (av.getSelectionGroup() == null) { seqs = av.getAlignment().getSequencesArray(); @@ -100,8 +106,6 @@ public class PCAPanel extends GPCAPanel implements Runnable return; } - Desktop.addInternalFrame(this, "Principal component analysis", 400, 400); - PaintRefresher.Register(this, av.getSequenceSetId()); rc = new RotatableCanvas(ap); @@ -129,7 +133,8 @@ public class PCAPanel extends GPCAPanel implements Runnable { try { - pca = new PCA(seqstrings.getSequenceStrings(' ')); + calcSettings.setEnabled(false); + pca = new PCA(seqstrings.getSequenceStrings(' '), nucleotide); pca.run(); // Now find the component coordinates @@ -167,18 +172,37 @@ public class PCAPanel extends GPCAPanel implements Runnable } rc.setPoints(points, pca.getM().rows); - rc.repaint(); - - addKeyListener(rc); + // rc.invalidate(); + nuclSetting.setSelected(nucleotide); + protSetting.setSelected(!nucleotide); } catch (OutOfMemoryError er) { new OOMWarning("calculating PCA", er); - + return; + } + calcSettings.setEnabled(true); + repaint(); + if (getParent()==null) + { + addKeyListener(rc); + Desktop.addInternalFrame(this, "Principal component analysis", 400, 400); } - } - + @Override + protected void nuclSetting_actionPerfomed(ActionEvent arg0) + { + nucleotide=true; + Thread worker = new Thread(this); + worker.start(); + } + @Override + protected void protSetting_actionPerfomed(ActionEvent arg0) + { + nucleotide=false; + Thread worker = new Thread(this); + worker.start(); + } /** * DOCUMENT ME! */ diff --git a/src/jalview/gui/RotatableCanvas.java b/src/jalview/gui/RotatableCanvas.java index f3312c3..458a29d 100755 --- a/src/jalview/gui/RotatableCanvas.java +++ b/src/jalview/gui/RotatableCanvas.java @@ -138,14 +138,16 @@ public class RotatableCanvas extends JPanel implements MouseListener, repaint(); } + boolean first=true; public void setPoints(Vector points, int npoint) { this.points = points; this.npoint = npoint; + if (first) { ToolTipManager.sharedInstance().registerComponent(this); ToolTipManager.sharedInstance().setInitialDelay(0); ToolTipManager.sharedInstance().setDismissDelay(10000); - + } prefsize = getPreferredSize(); orig = new float[npoint][3]; @@ -186,11 +188,13 @@ public class RotatableCanvas extends JPanel implements MouseListener, findWidth(); scale = findScale(); - + if (first) { + addMouseListener(this); addMouseMotionListener(this); - + } + first=false; } public void initAxes() diff --git a/src/jalview/jbgui/GPCAPanel.java b/src/jalview/jbgui/GPCAPanel.java index 62234bf..72cb5cd 100755 --- a/src/jalview/jbgui/GPCAPanel.java +++ b/src/jalview/jbgui/GPCAPanel.java @@ -71,6 +71,9 @@ public class GPCAPanel extends JInternalFrame protected JMenu associateViewsMenu = new JMenu(); + protected JMenu calcSettings=new JMenu(); + protected JCheckBoxMenuItem nuclSetting=new JCheckBoxMenuItem(); + protected JCheckBoxMenuItem protSetting=new JCheckBoxMenuItem(); public GPCAPanel() { try @@ -218,6 +221,30 @@ public class GPCAPanel extends JInternalFrame } }); associateViewsMenu.setText("Associate Nodes With"); + calcSettings.setText("Change Parameters"); + nuclSetting.setText("Nucleotide matrix"); + protSetting.setText("Protein matrix"); + nuclSetting.addActionListener(new ActionListener() + { + + @Override + public void actionPerformed(ActionEvent arg0) + { + nuclSetting_actionPerfomed(arg0); + } + }); + protSetting.addActionListener(new ActionListener() + { + + @Override + public void actionPerformed(ActionEvent arg0) + { + protSetting_actionPerfomed(arg0); + } + });calcSettings.add(nuclSetting); + calcSettings.add(protSetting); + + this.getContentPane().add(jPanel2, BorderLayout.SOUTH); jPanel2.add(jLabel1, null); jPanel2.add(xCombobox, null); @@ -227,6 +254,7 @@ public class GPCAPanel extends JInternalFrame jPanel2.add(zCombobox, null); jMenuBar1.add(fileMenu); jMenuBar1.add(viewMenu); + jMenuBar1.add(calcSettings); fileMenu.add(saveMenu); fileMenu.add(outputValues); fileMenu.add(print); @@ -240,6 +268,18 @@ public class GPCAPanel extends JInternalFrame viewMenu.add(associateViewsMenu); } + protected void protSetting_actionPerfomed(ActionEvent arg0) + { + // TODO Auto-generated method stub + + } + + protected void nuclSetting_actionPerfomed(ActionEvent arg0) + { + // TODO Auto-generated method stub + + } + protected void outputPoints_actionPerformed(ActionEvent e) { // TODO Auto-generated method stub diff --git a/src/jalview/schemes/ResidueProperties.java b/src/jalview/schemes/ResidueProperties.java index 4c8394f..98b7101 100755 --- a/src/jalview/schemes/ResidueProperties.java +++ b/src/jalview/schemes/ResidueProperties.java @@ -97,6 +97,15 @@ public class ResidueProperties // extend subt. matrices } + /** + * maximum (gap) index for matrices involving protein alphabet + */ + public final static int maxProteinIndex=23; + /** + * maximum (gap) index for matrices involving nucleotide alphabet + */ + public final static int maxNucleotideIndex=10; + static { nucleotideIndex = new int[255]; @@ -572,21 +581,27 @@ public class ResidueProperties * new Color(60, 136, 238), // U Color.white, // I Color.white, // X * Color.white, // R Color.white, // Y Color.white, // N Color.white, // Gap */ + + // JBPNote: patch matrix for T/U equivalence when working with DNA or RNA. + // Will equate sequences if working with mixed nucleotide sets. + // treats T and U identically. R and Y weak equivalence with AG and CTU. + // N matches any other base weakly + // static final int[][] DNA = { - { 10, -8, -8, -8, 1, 0, 0, 0, 0, 0, 1 }, // C - { -8, 10, -8, -8, 1, 0, 0, 0, 0, 0, 1 }, // T - { -8, -8, 10, -8, 1, 0, 0, 0, 0, 0, 1 }, // A - { -8, -8, -8, 10, 1, 0, 0, 0, 0, 0, 1 }, // G - { 1, 1, 1, 1, 10, 0, 0, 0, 0, 0, 1 }, // - - { 1, 1, 1, 1, 1, 10, 0, 0, 0, 0, 1 }, // - - { 1, 1, 1, 1, 1, 0, 10, 0, 0, 0, 1 }, // - - { 1, 1, 1, 1, 1, 0, 0, 10, 0, 0, 1 }, // - - { 1, 1, 1, 1, 1, 0, 0, 0, 10, 0, 1 }, // - - { 1, 1, 1, 1, 1, 0, 0, 0, 0, 10, 1 }, // - - { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, // - + { 10, -8, -8, -8, -8, 1, 1, -8, 1, 1, 1 }, // C + { -8, 10, -8, -8, 10, 1, 1, -8, 1, 1, 1 }, // T + { -8, -8, 10, -8, -8, 1, 1, 1, -8, 1, 1 }, // A + { -8, -8, -8, 10, -8, 1, 1, 1, -8, 1, 1 }, // G + { -8, 10, -8, -8, 10, 1, 1, -8, 1, 1, 1 }, // U + { 1, 1, 1, 1, 1, 10, 0, 0, 0, 1, 1 }, // I + { 1, 1, 1, 1, 1, 0, 10, 0, 0, 1, 1 }, // X + { -8, -8, 1, 1, -8, 0, 0, 10, 0, 1, 1 }, // R + { 1, 1, -8, -8, 1, 0, 0, 0, 10, 1, 1 }, // Y + { 1, 1, 1, 1, 1, 1, 1, 1, 1, 10, 1 }, // N + { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, // - }; - /** +/** * register matrices in list */ static @@ -594,6 +609,7 @@ public class ResidueProperties scoreMatrices.put("BLOSUM62", new ScoreMatrix("BLOSUM62", BLOSUM62, 0)); scoreMatrices.put("PAM250", new ScoreMatrix("PAM250", PAM250, 0)); scoreMatrices.put("DNA", new ScoreMatrix("DNA", DNA, 1)); + } public static final Color[] pidColours =