*/
package jalview.analysis;
-import jalview.analysis.scoremodels.PIDModel;
-import jalview.api.analysis.DistanceScoreModelI;
import jalview.api.analysis.ScoreModelI;
import jalview.api.analysis.SimilarityParamsI;
-import jalview.api.analysis.SimilarityScoreModelI;
import jalview.datamodel.AlignmentView;
import jalview.math.MatrixI;
// long now = System.currentTimeMillis();
try
{
- eigenvector = computeSimilarity();
+ eigenvector = scoreModel.findSimilarities(seqs, similarityParams);
details.append(" --- OrigT * Orig ---- \n");
eigenvector.print(ps, "%8.2f");
}
/**
- * Computes a pairwise similarity matrix for the given sequence regions using
- * the configured score model. If the score model is a similarity model, then
- * it computes the result directly. If it is a distance model, then use it to
- * compute pairwise distances, and convert these to similarity scores.
- *
- * @param av
- * @return
- */
- MatrixI computeSimilarity()
- {
- MatrixI result = null;
- if (scoreModel instanceof SimilarityScoreModelI)
- {
- result = ((SimilarityScoreModelI) scoreModel).findSimilarities(seqs,
- similarityParams);
- if (scoreModel instanceof PIDModel)
- {
- /*
- * scale score to width of alignment for backwards
- * compatibility with Jalview 2.10.1 SeqSpace PCA calculation
- */
- result.multiply(seqs.getWidth() / 100d);
- }
- }
- else if (scoreModel instanceof DistanceScoreModelI)
- {
- /*
- * find distances and convert to similarity scores
- * reverseRange(false) preserves but reverses the min-max range
- */
- result = ((DistanceScoreModelI) scoreModel).findDistances(seqs,
- similarityParams);
- result.reverseRange(false);
- }
- else
- {
- System.err
- .println("Unexpected type of score model, cannot calculate similarity");
- }
-
- return result;
- }
-
- /**
* Answers the N dimensions of the NxN PCA matrix. This is the number of
* sequences involved in the pairwise score calculation.
*
package jalview.analysis;
-import jalview.api.analysis.DistanceScoreModelI;
import jalview.api.analysis.ScoreModelI;
import jalview.api.analysis.SimilarityParamsI;
-import jalview.api.analysis.SimilarityScoreModelI;
import jalview.datamodel.AlignmentView;
import jalview.datamodel.CigarArray;
import jalview.datamodel.SeqCigar;
*/
protected void computeTree(ScoreModelI sm, SimilarityParamsI scoreOptions)
{
- if (sm instanceof DistanceScoreModelI)
- {
- distances = ((DistanceScoreModelI) sm).findDistances(seqData,
- scoreOptions);
- }
- else if (sm instanceof SimilarityScoreModelI)
- {
- /*
- * compute similarity and invert it to give a distance measure
- * reverseRange(true) converts maximum similarity to zero distance
- */
- MatrixI result = ((SimilarityScoreModelI) sm).findSimilarities(
- seqData, scoreOptions);
- result.reverseRange(true);
- distances = result;
- }
+ distances = sm.findDistances(seqData, scoreOptions);
makeLeaves();
--- /dev/null
+package jalview.analysis.scoremodels;
+
+import jalview.api.analysis.ScoreModelI;
+import jalview.api.analysis.SimilarityParamsI;
+import jalview.datamodel.AlignmentView;
+import jalview.math.MatrixI;
+
+public abstract class DistanceScoreModel implements ScoreModelI
+{
+ /**
+ * A similarity score is calculated by first computing a distance score, and
+ * then reversing the min-max range of the score values
+ */
+ @Override
+ public MatrixI findSimilarities(AlignmentView seqData,
+ SimilarityParamsI options)
+ {
+ MatrixI result = findDistances(seqData, options);
+
+ /*
+ * reverse the range of score values so that
+ * max becomes min and vice versa
+ */
+ result.reverseRange(false);
+
+ return result;
+ }
+}
import jalview.api.AlignmentViewPanel;
import jalview.api.FeatureRenderer;
-import jalview.api.analysis.DistanceScoreModelI;
import jalview.api.analysis.SimilarityParamsI;
import jalview.api.analysis.ViewBasedAnalysisI;
import jalview.datamodel.AlignmentView;
import java.util.Map;
import java.util.Set;
-public class FeatureDistanceModel implements DistanceScoreModelI,
+public class FeatureDistanceModel extends DistanceScoreModel implements
ViewBasedAnalysisI
{
private static final String NAME = "Sequence Feature Similarity";
import jalview.api.analysis.PairwiseScoreModelI;
import jalview.api.analysis.SimilarityParamsI;
-import jalview.api.analysis.SimilarityScoreModelI;
import jalview.datamodel.AlignmentView;
import jalview.math.Matrix;
import jalview.math.MatrixI;
/**
* A class to provide sequence pairwise similarity based on residue identity
*/
-public class PIDModel implements SimilarityScoreModelI,
+public class PIDModel extends SimilarityScoreModel implements
PairwiseScoreModelI
{
private static final String NAME = "PID";
return c;
}
+ /**
+ * Computes similarity scores based on pairwise percentage identity of
+ * sequences. For consistency with Jalview 2.10.1's SeqSpace mode PCA
+ * calculation, the percentage scores are rescaled to the width of the
+ * sequences (as if counts of identical residues).
+ */
@Override
public MatrixI findSimilarities(AlignmentView seqData,
SimilarityParamsI options)
{
String[] seqs = seqData.getSequenceStrings(Comparison.GAP_DASH);
- return findSimilarities(seqs, options);
+
+ MatrixI result = findSimilarities(seqs, options);
+
+ result.multiply(seqData.getWidth() / 100d);
+
+ return result;
+ }
+
+ /**
+ * A distance score is computed in the usual way (by reversing the range of
+ * the similarity score results), and then rescaled to percentage values
+ * (reversing the rescaling to count values done in findSimilarities)
+ */
+ @Override
+ public MatrixI findDistances(AlignmentView seqData,
+ SimilarityParamsI options)
+ {
+ MatrixI result = super.findDistances(seqData, options);
+
+ if (seqData.getWidth() != 0)
+ {
+ result.multiply(100d / seqData.getWidth());
+ }
+
+ return result;
}
/**
import jalview.api.analysis.PairwiseScoreModelI;
import jalview.api.analysis.SimilarityParamsI;
-import jalview.api.analysis.SimilarityScoreModelI;
import jalview.datamodel.AlignmentView;
import jalview.math.Matrix;
import jalview.math.MatrixI;
* A class that models a substitution score matrix for any given alphabet of
* symbols
*/
-public class ScoreMatrix implements SimilarityScoreModelI,
+public class ScoreMatrix extends SimilarityScoreModel implements
PairwiseScoreModelI
{
private static final char GAP_CHARACTER = Comparison.GAP_DASH;
--- /dev/null
+package jalview.analysis.scoremodels;
+
+import jalview.api.analysis.ScoreModelI;
+import jalview.api.analysis.SimilarityParamsI;
+import jalview.datamodel.AlignmentView;
+import jalview.math.MatrixI;
+
+public abstract class SimilarityScoreModel implements ScoreModelI
+{
+
+ /**
+ * Computed similarity scores are converted to distance scores by subtracting
+ * every value from the maximum value. That is, maximum similarity corresponds
+ * to zero distance, and smaller similarities to larger distances.
+ */
+ @Override
+ public MatrixI findDistances(AlignmentView seqData,
+ SimilarityParamsI options)
+ {
+ MatrixI result = findSimilarities(seqData, options);
+
+ /*
+ * reverse the range of the scores mapping the maximum to zero
+ */
+ result.reverseRange(true);
+
+ return result;
+ }
+
+}
import jalview.analysis.AlignSeq;
import jalview.api.analysis.SimilarityParamsI;
-import jalview.api.analysis.SimilarityScoreModelI;
import jalview.datamodel.AlignmentView;
import jalview.datamodel.SequenceI;
import jalview.math.Matrix;
* A class that computes pairwise similarity scores using the Smith-Waterman
* alignment algorithm
*/
-public class SmithWatermanModel implements SimilarityScoreModelI
+public class SmithWatermanModel extends SimilarityScoreModel
{
private static final String NAME = "Smith Waterman Score";
+++ /dev/null
-package jalview.api.analysis;
-
-import jalview.datamodel.AlignmentView;
-import jalview.math.MatrixI;
-
-/**
- * A sequence distance score models, that provides a method to compute distances
- * between pairs of sequences. The options parameter provides configuration
- * choices for how the similarity score is calculated.
- *
- * @author gmcarstairs
- *
- */
-public interface DistanceScoreModelI extends ScoreModelI
-{
- /**
- * Returns a distance score for the given sequence regions, that is, a matrix
- * whose value [i][j] is the distance of sequence i from sequence j by some
- * measure
- *
- * @param seqData
- * @param options
- * @return
- */
- MatrixI findDistances(AlignmentView seqData, SimilarityParamsI options);
-}
package jalview.api.analysis;
+import jalview.datamodel.AlignmentView;
+import jalview.math.MatrixI;
+
public interface ScoreModelI
{
/**
boolean isProtein();
// TODO getName, isDNA, isProtein can be static methods in Java 8
+
+ /**
+ * Returns a distance score for the given sequence regions, that is, a matrix
+ * whose value [i][j] is the distance of sequence i from sequence j by some
+ * measure. The options parameter provides configuration choices for how the
+ * similarity score is calculated.
+ *
+ * @param seqData
+ * @param options
+ * @return
+ */
+
+ MatrixI findDistances(AlignmentView seqData, SimilarityParamsI options);
+
+ /**
+ * Returns a similarity score for the given sequence regions, that is, a
+ * matrix whose value [i][j] is the similarity of sequence i to sequence j by
+ * some measure. The options parameter provides configuration choices for how
+ * the similarity score is calculated.
+ *
+ * @param seqData
+ * @param options
+ * @return
+ */
+ MatrixI findSimilarities(AlignmentView seqData, SimilarityParamsI options);
}
+++ /dev/null
-package jalview.api.analysis;
-
-import jalview.datamodel.AlignmentView;
-import jalview.math.MatrixI;
-
-/**
- * A class that provides a method to compute similarity scores of pairs of
- * sequences
- *
- * @author gmcarstairs
- *
- */
-public interface SimilarityScoreModelI extends ScoreModelI
-{
- /**
- * Returns a similarity score for the given sequence regions, that is, a
- * matrix whose value [i][j] is the similarity of sequence i to sequence j by
- * some measure. The options parameter provides configuration choices for how
- * the similarity score is calculated.
- *
- * @param seqData
- * @param options
- * @return
- */
- MatrixI findSimilarities(AlignmentView seqData, SimilarityParamsI options);
-}
+++ /dev/null
-package jalview.analysis;
-
-import static org.testng.Assert.assertNotNull;
-import static org.testng.internal.junit.ArrayAsserts.assertArrayEquals;
-
-import jalview.analysis.scoremodels.FeatureDistanceModel;
-import jalview.analysis.scoremodels.PIDModel;
-import jalview.analysis.scoremodels.ScoreModels;
-import jalview.analysis.scoremodels.SimilarityParams;
-import jalview.api.analysis.ScoreModelI;
-import jalview.api.analysis.SimilarityParamsI;
-import jalview.api.analysis.ViewBasedAnalysisI;
-import jalview.datamodel.AlignmentView;
-import jalview.datamodel.SequenceFeature;
-import jalview.datamodel.SequenceI;
-import jalview.gui.AlignFrame;
-import jalview.io.DataSourceType;
-import jalview.io.FileLoader;
-import jalview.math.MatrixI;
-
-import org.testng.annotations.BeforeTest;
-import org.testng.annotations.Test;
-
-public class PCATest
-{
-
- private static final String TESTSEQS = ">s1\nAFRK\n>s2\nAFSS\n>s3\nAFTL\n>s4\nARSL\n";
- private AlignFrame af;
-
- @Test(groups = "Functional")
- public void testComputeSimilarity_blosum62()
- {
- setUp();
- SimilarityParamsI params = new SimilarityParams(true, false, true,
- false);
- AlignmentView view = af.getViewport().getAlignmentView(false);
- ScoreModelI blosum62 = ScoreModels.getInstance().getBlosum62();
- PCA pca = new PCA(view, blosum62, params);
-
- MatrixI result = pca.computeSimilarity();
- assertNotNull(result);
-
- /*
- * AFRK^AFRK = 4+6+5+5 = 20
- * AFRK^AFSS = 4+6+-1+0 = 9
- * AFRK^AFTL = 4+6+-1+-2 = 7
- * AFRK^ARSL = 4+-3+-1+-2 = -2
- */
- assertArrayEquals(result.getRow(0), new double[] { 20, 9, 7, -2 },
- 0.00001d);
- }
-
- @BeforeTest(alwaysRun = true)
- public void setUp()
- {
- af = new FileLoader().LoadFileWaitTillLoaded(TESTSEQS,
- DataSourceType.PASTE);
- }
-
- @Test(groups = "Functional")
- public void testComputeSimilarity_PID()
- {
- setUp();
- SimilarityParamsI params = new SimilarityParams(true, false, true,
- false);
- AlignmentView view = af.getViewport().getAlignmentView(false);
- ScoreModelI pid = new PIDModel();
- PCA pca = new PCA(view, pid, params);
-
- MatrixI result = pca.computeSimilarity();
- assertNotNull(result);
-
- /*
- * AFRK^AFRK = 4 scaled to width
- * AFRK^AFSS = 2
- * AFRK^AFTL = 2
- * AFRK^ARSL = 1
- */
- assertArrayEquals(new double[] { 4d, 2d, 2d, 1d },
- result.getRow(0), 0.00001d);
- }
-
- @Test(groups = "Functional")
- public void testComputeSimilarity_featureDistances()
- {
- setUp();
- SimilarityParamsI params = new SimilarityParams(true, false, true,
- false);
- AlignmentView view = af.getViewport().getAlignmentView(false);
- ScoreModelI featureModel = new FeatureDistanceModel();
- PCA pca = new PCA(view, featureModel, params);
-
- MatrixI result = pca.computeSimilarity();
-
- /*
- * no features = no scores!
- */
- assertArrayEquals(new double[] { 0d, 0d, 0d, 0d },
- result.getRow(0), 0.00001d);
-
- SequenceI[] seqs = af.getViewport().getAlignment().getSequencesArray();
- seqs[0].addSequenceFeature(new SequenceFeature("Cath", "", 1, 4, 0f,
- null));
- seqs[1].addSequenceFeature(new SequenceFeature("Cath", "", 1, 4, 0f,
- null));
- seqs[2].addSequenceFeature(new SequenceFeature("Pfam", "", 1, 4, 0f,
- null));
- seqs[3].addSequenceFeature(new SequenceFeature("Pfam", "", 2, 3, 0f,
- null));
-
- af.getFeatureRenderer().findAllFeatures(true);
- ((ViewBasedAnalysisI) featureModel)
- .configureFromAlignmentView(af.alignPanel);
-
- /*
- * feature distance scores are (average number of features not shared):
- * diagonal: 0
- * seq1^seq2 0
- * seq1^seq3 8 / 4 = 2
- * seq1^seq4 6 / 4 = 1.5
- * seq2^seq3 8 / 4 = 2
- * seq2^seq4 6 / 3 = 1.5
- * seq3^seq4 2 / 4 = 0.5
- * so
- * { 0, 0, 2, 1.5
- * 0, 0, 2, 1.5
- * 2, 2, 0, 0.5
- * 1.5, 1.5, 0.5, 0
- * }
- * subtract each value from the max value to get similarity scores
- */
- result = pca.computeSimilarity();
- // assertArrayEquals(new double[] { 2d, 2d, 0d, 0.5d }, result.getRow(0),
- // 0.00001d);
- // assertArrayEquals(new double[] { 2d, 2d, 0d, 0.5d }, result.getRow(1),
- // 0.00001d);
- // assertArrayEquals(new double[] { 0d, 0d, 2d, 1.5d }, result.getRow(2),
- // 0.00001d);
- // assertArrayEquals(new double[] { 0.5d, 0.5d, 1.5d, 2d },
- // result.getRow(3), 0.00001d);
-
- /*
- * JAL-2424 bug means instead we get distance scores of
- * 8 / 5 = 1.6
- * 6 / 5 = 1.2
- * 2 / 5 = 0.4
- * so (until bug is fixed)
- * { 0, 0, 1.6, 1.2
- * 0, 0, 1.6, 1.2
- * 1.6, 1.6, 0, 0.4
- * 1.2, 1.2, 0.4, 0
- * }
- */
- assertArrayEquals(new double[] { 1.6d, 1.6d, 0d, 0.4d },
- result.getRow(0),
- 0.00001d);
- assertArrayEquals(new double[] { 1.6d, 1.6d, 0d, 0.4d },
- result.getRow(1),
- 0.00001d);
- assertArrayEquals(new double[] { 0d, 0d, 1.6d, 1.2d },
- result.getRow(2),
- 0.00001d);
- assertArrayEquals(new double[] { 0.4d, 0.4d, 1.2d, 1.6d },
- result.getRow(3), 0.00001d);
- }
-}
// these left to fail pending resolution of
// JAL-2424 (computing score as 5/6, should be 5/5)
- assertEquals(distances.getValue(0, 1), 1f);
+ // see also PCATest.testComputeSimilarity_featureDistances()
+ assertEquals(distances.getValue(0, 1), 1f, "JAL-2424!");
assertEquals(distances.getValue(1, 0), 1f);
}
import static org.testng.Assert.assertFalse;
import static org.testng.Assert.assertTrue;
-import jalview.api.analysis.DistanceScoreModelI;
import jalview.api.analysis.PairwiseScoreModelI;
import jalview.api.analysis.ScoreModelI;
-import jalview.api.analysis.SimilarityScoreModelI;
import java.util.Iterator;
* Verify that the singleton constructor successfully loads Jalview's built-in
* score models
*/
- @Test
+ @Test(groups = "Functional")
public void testConstructor()
{
Iterator<ScoreModelI> models = ScoreModels.getInstance().getModels()
* models are served in order of addition
*/
ScoreModelI sm = models.next();
- assertTrue(sm instanceof SimilarityScoreModelI);
+ assertTrue(sm instanceof SimilarityScoreModel);
assertTrue(sm instanceof PairwiseScoreModelI);
- assertFalse(sm instanceof DistanceScoreModelI);
+ assertFalse(sm instanceof DistanceScoreModel);
assertEquals(sm.getName(), "BLOSUM62");
assertEquals(((PairwiseScoreModelI) sm).getPairwiseScore('I', 'R'), -3f);
sm = models.next();
- assertTrue(sm instanceof SimilarityScoreModelI);
+ assertTrue(sm instanceof SimilarityScoreModel);
assertTrue(sm instanceof PairwiseScoreModelI);
- assertFalse(sm instanceof DistanceScoreModelI);
+ assertFalse(sm instanceof DistanceScoreModel);
assertEquals(sm.getName(), "PAM250");
assertEquals(((PairwiseScoreModelI) sm).getPairwiseScore('R', 'C'), -4f);
sm = models.next();
- assertTrue(sm instanceof SimilarityScoreModelI);
+ assertTrue(sm instanceof SimilarityScoreModel);
assertTrue(sm instanceof PairwiseScoreModelI);
- assertFalse(sm instanceof DistanceScoreModelI);
+ assertFalse(sm instanceof DistanceScoreModel);
assertEquals(sm.getName(), "PID");
assertEquals(((PairwiseScoreModelI) sm).getPairwiseScore('R', 'C'), 0f);
assertEquals(((PairwiseScoreModelI) sm).getPairwiseScore('R', 'r'), 1f);
sm = models.next();
- assertTrue(sm instanceof SimilarityScoreModelI);
+ assertTrue(sm instanceof SimilarityScoreModel);
assertTrue(sm instanceof PairwiseScoreModelI);
- assertFalse(sm instanceof DistanceScoreModelI);
+ assertFalse(sm instanceof DistanceScoreModel);
assertEquals(sm.getName(), "DNA");
assertEquals(((PairwiseScoreModelI) sm).getPairwiseScore('c', 'x'), 1f);
sm = models.next();
- assertFalse(sm instanceof SimilarityScoreModelI);
+ assertFalse(sm instanceof SimilarityScoreModel);
assertFalse(sm instanceof PairwiseScoreModelI);
- assertTrue(sm instanceof DistanceScoreModelI);
+ assertTrue(sm instanceof DistanceScoreModel);
assertEquals(sm.getName(), "Sequence Feature Similarity");
}
* suite. It makes no assertions and is just provided as a utility method for
* printing out matrices. Relocated here from ScoreMatrixPrinter.
*/
- @Test
+ @Test(groups = "none")
public void printAllMatrices_tabDelimited()
{
printAllMatrices(false);
* suite. It makes no assertions and is just provided as a utility method for
* printing out matrices. Relocated here from ScoreMatrixPrinter.
*/
- @Test
+ @Test(groups = "none")
public void printAllMatrices_asHtml()
{
printAllMatrices(true);