From 8717834368bd00d8adfa47ee099288acd34363ef Mon Sep 17 00:00:00 2001 From: gmungoc Date: Fri, 24 Feb 2017 18:38:56 +0000 Subject: [PATCH] JAL-838 added 'SeqSpace' PID mode, added parameters to findDistances and findSimilarities (not yet acted on!) --- src/jalview/analysis/NJTree.java | 18 +++++++------ src/jalview/analysis/PCA.java | 8 ++++-- .../analysis/scoremodels/FeatureDistanceModel.java | 4 ++- .../analysis/scoremodels/PIDDistanceModel.java | 4 ++- src/jalview/analysis/scoremodels/PIDModel.java | 7 ++--- .../analysis/scoremodels/SWDistanceModel.java | 4 ++- src/jalview/analysis/scoremodels/ScoreMatrix.java | 10 +++++--- .../analysis/scoremodels/SimilarityParams.java | 27 ++++++++++++++++++++ src/jalview/api/analysis/DistanceScoreModelI.java | 6 +++-- .../api/analysis/SimilarityScoreModelI.java | 6 +++-- .../scoremodels/FeatureDistanceModelTest.java | 20 +++++++-------- .../analysis/scoremodels/ScoreMatrixTest.java | 2 +- 12 files changed, 82 insertions(+), 34 deletions(-) diff --git a/src/jalview/analysis/NJTree.java b/src/jalview/analysis/NJTree.java index 2c6b61d..fdadb13 100644 --- a/src/jalview/analysis/NJTree.java +++ b/src/jalview/analysis/NJTree.java @@ -21,6 +21,7 @@ package jalview.analysis; import jalview.analysis.scoremodels.ScoreModels; +import jalview.analysis.scoremodels.SimilarityParams; import jalview.api.analysis.DistanceScoreModelI; import jalview.api.analysis.ScoreModelI; import jalview.api.analysis.SimilarityScoreModelI; @@ -231,6 +232,10 @@ public class NJTree { this.sequence = sqs; this.node = new Vector(); + if (!(treeType.equals(NEIGHBOUR_JOINING))) + { + treeType = AVERAGE_DISTANCE; + } this.type = treeType; this.pwtype = modelType; if (seqView != null) @@ -248,11 +253,6 @@ public class NJTree sdata.addOperation(CigarArray.M, end - start + 1); this.seqData = new AlignmentView(sdata, start); } - // System.err.println("Made seqData");// dbg - if (!(treeType.equals(NEIGHBOUR_JOINING))) - { - treeType = AVERAGE_DISTANCE; - } if (sm == null && !(modelType.equals("PID"))) { @@ -274,17 +274,19 @@ public class NJTree noseqs = i++; + // TODO pass choice of params from GUI in constructo if (sm instanceof DistanceScoreModelI) { - distance = ((DistanceScoreModelI) sm).findDistances(seqData); + distance = ((DistanceScoreModelI) sm).findDistances(seqData, + SimilarityParams.Jalview); } else if (sm instanceof SimilarityScoreModelI) { /* * compute similarity and invert it to give a distance measure */ - MatrixI result = ((SimilarityScoreModelI) sm) - .findSimilarities(seqData); + MatrixI result = ((SimilarityScoreModelI) sm).findSimilarities( + seqData, SimilarityParams.Jalview); result.reverseRange(true); distance = result; } diff --git a/src/jalview/analysis/PCA.java b/src/jalview/analysis/PCA.java index 1797296..1f923b1 100755 --- a/src/jalview/analysis/PCA.java +++ b/src/jalview/analysis/PCA.java @@ -20,6 +20,7 @@ */ package jalview.analysis; +import jalview.analysis.scoremodels.SimilarityParams; import jalview.api.analysis.DistanceScoreModelI; import jalview.api.analysis.ScoreModelI; import jalview.api.analysis.SimilarityScoreModelI; @@ -228,13 +229,16 @@ public class PCA implements Runnable MatrixI computeSimilarity(AlignmentView av) { MatrixI result = null; + // TODO pass choice of params from GUI in constructo if (scoreModel instanceof SimilarityScoreModelI) { - result = ((SimilarityScoreModelI) scoreModel).findSimilarities(av); + result = ((SimilarityScoreModelI) scoreModel).findSimilarities(av, + SimilarityParams.SeqSpace); } else if (scoreModel instanceof DistanceScoreModelI) { - result = ((DistanceScoreModelI) scoreModel).findDistances(av); + result = ((DistanceScoreModelI) scoreModel).findDistances(av, + SimilarityParams.SeqSpace); result.reverseRange(false); } else diff --git a/src/jalview/analysis/scoremodels/FeatureDistanceModel.java b/src/jalview/analysis/scoremodels/FeatureDistanceModel.java index 9245898..636c19b 100644 --- a/src/jalview/analysis/scoremodels/FeatureDistanceModel.java +++ b/src/jalview/analysis/scoremodels/FeatureDistanceModel.java @@ -23,6 +23,7 @@ package jalview.analysis.scoremodels; import jalview.api.AlignmentViewPanel; import jalview.api.FeatureRenderer; import jalview.api.analysis.DistanceScoreModelI; +import jalview.api.analysis.SimilarityParamsI; import jalview.api.analysis.ViewBasedAnalysisI; import jalview.datamodel.AlignmentView; import jalview.datamodel.SeqCigar; @@ -58,7 +59,8 @@ public class FeatureDistanceModel implements DistanceScoreModelI, * of columns processed. */ @Override - public MatrixI findDistances(AlignmentView seqData) + public MatrixI findDistances(AlignmentView seqData, + SimilarityParamsI options) { List dft = fr.getDisplayedFeatureTypes(); SeqCigar[] seqs = seqData.getSequences(); diff --git a/src/jalview/analysis/scoremodels/PIDDistanceModel.java b/src/jalview/analysis/scoremodels/PIDDistanceModel.java index 154ff02..9688037 100644 --- a/src/jalview/analysis/scoremodels/PIDDistanceModel.java +++ b/src/jalview/analysis/scoremodels/PIDDistanceModel.java @@ -21,6 +21,7 @@ package jalview.analysis.scoremodels; import jalview.api.analysis.DistanceScoreModelI; +import jalview.api.analysis.SimilarityParamsI; import jalview.datamodel.AlignmentView; import jalview.math.Matrix; import jalview.math.MatrixI; @@ -30,7 +31,8 @@ public class PIDDistanceModel implements DistanceScoreModelI { @Override - public MatrixI findDistances(AlignmentView seqData) + public MatrixI findDistances(AlignmentView seqData, + SimilarityParamsI options) { String[] sequenceString = seqData .getSequenceStrings(Comparison.GAP_SPACE); diff --git a/src/jalview/analysis/scoremodels/PIDModel.java b/src/jalview/analysis/scoremodels/PIDModel.java index 58667c0..aff1210 100644 --- a/src/jalview/analysis/scoremodels/PIDModel.java +++ b/src/jalview/analysis/scoremodels/PIDModel.java @@ -62,11 +62,11 @@ public class PIDModel implements SimilarityScoreModelI, } @Override - public MatrixI findSimilarities(AlignmentView seqData) + public MatrixI findSimilarities(AlignmentView seqData, + SimilarityParamsI options) { - // TODO reuse code in ScoreMatrix instead somehow String[] seqs = seqData.getSequenceStrings(' '); - return findSimilarities(seqs, SimilarityParams.Jalview); + return findSimilarities(seqs, options); } /** @@ -80,6 +80,7 @@ public class PIDModel implements SimilarityScoreModelI, protected MatrixI findSimilarities(String[] seqs, SimilarityParamsI options) { + // TODO reuse code in ScoreMatrix instead somehow double[][] values = new double[seqs.length][]; for (int row = 0; row < seqs.length; row++) { diff --git a/src/jalview/analysis/scoremodels/SWDistanceModel.java b/src/jalview/analysis/scoremodels/SWDistanceModel.java index 5e711db..b27cf26 100644 --- a/src/jalview/analysis/scoremodels/SWDistanceModel.java +++ b/src/jalview/analysis/scoremodels/SWDistanceModel.java @@ -22,6 +22,7 @@ package jalview.analysis.scoremodels; import jalview.analysis.AlignSeq; import jalview.api.analysis.DistanceScoreModelI; +import jalview.api.analysis.SimilarityParamsI; import jalview.datamodel.AlignmentView; import jalview.datamodel.SequenceI; import jalview.math.Matrix; @@ -32,7 +33,8 @@ public class SWDistanceModel implements DistanceScoreModelI { @Override - public MatrixI findDistances(AlignmentView seqData) + public MatrixI findDistances(AlignmentView seqData, + SimilarityParamsI options) { SequenceI[] sequenceString = seqData.getVisibleAlignment( Comparison.GAP_SPACE).getSequencesArray(); diff --git a/src/jalview/analysis/scoremodels/ScoreMatrix.java b/src/jalview/analysis/scoremodels/ScoreMatrix.java index 7f71d0f..84835a4 100644 --- a/src/jalview/analysis/scoremodels/ScoreMatrix.java +++ b/src/jalview/analysis/scoremodels/ScoreMatrix.java @@ -21,6 +21,7 @@ package jalview.analysis.scoremodels; import jalview.api.analysis.PairwiseScoreModelI; +import jalview.api.analysis.SimilarityParamsI; import jalview.api.analysis.SimilarityScoreModelI; import jalview.datamodel.AlignmentView; import jalview.math.Matrix; @@ -345,19 +346,22 @@ public class ScoreMatrix implements SimilarityScoreModelI, * */ @Override - public MatrixI findSimilarities(AlignmentView seqstrings) + public MatrixI findSimilarities(AlignmentView seqstrings, + SimilarityParamsI options) { char gapChar = scoreGapAsAny ? (seqstrings.isNa() ? 'N' : 'X') : ' '; String[] seqs = seqstrings.getSequenceStrings(gapChar); - return findSimilarities(seqs); + return findSimilarities(seqs, options); } /** * @param seqs * @return */ - protected MatrixI findSimilarities(String[] seqs) + protected MatrixI findSimilarities(String[] seqs, + SimilarityParamsI options) { + // todo use options in calculation double[][] values = new double[seqs.length][]; for (int row = 0; row < seqs.length; row++) { diff --git a/src/jalview/analysis/scoremodels/SimilarityParams.java b/src/jalview/analysis/scoremodels/SimilarityParams.java index 8b6b7d0..4cc5b1f 100644 --- a/src/jalview/analysis/scoremodels/SimilarityParams.java +++ b/src/jalview/analysis/scoremodels/SimilarityParams.java @@ -4,18 +4,45 @@ import jalview.api.analysis.SimilarityParamsI; public class SimilarityParams implements SimilarityParamsI { + /** + * Based on Jalview's Comparison.PID method, which includes gaps and counts + * them as matching; it counts over the length of the shorter sequence + */ public static final SimilarityParamsI Jalview = new SimilarityParams( true, true, true, true); + /** + * 'SeqSpace' mode PCA calculation includes gaps but does not count them as + * matching; it uses the longest sequence length + */ + public static final SimilarityParamsI SeqSpace = new SimilarityParams( + true, false, true, true); + + /** + * as described in the Raghava-Barton paper; considers pairwise similarity + * only (excludes gap-gap) and does not match gaps + */ public static final SimilarityParamsI PID1 = new SimilarityParams(false, false, true, false); + /** + * as described in the Raghava-Barton paper; considers pairwise similarity + * only (excludes gap-gap) and does not match gaps + */ public static final SimilarityParamsI PID2 = new SimilarityParams(false, false, false, false); + /** + * as described in the Raghava-Barton paper; considers pairwise similarity + * only (excludes gap-gap) and does not match gaps + */ public static final SimilarityParamsI PID3 = new SimilarityParams(false, false, false, true); + /** + * as described in the Raghava-Barton paper; considers pairwise similarity + * only (excludes gap-gap) and does not match gaps + */ public static final SimilarityParamsI PID4 = new SimilarityParams(false, false, true, true); diff --git a/src/jalview/api/analysis/DistanceScoreModelI.java b/src/jalview/api/analysis/DistanceScoreModelI.java index 27003e8..0a39c0b 100644 --- a/src/jalview/api/analysis/DistanceScoreModelI.java +++ b/src/jalview/api/analysis/DistanceScoreModelI.java @@ -5,7 +5,8 @@ import jalview.math.MatrixI; /** * A sequence distance score models, that provides a method to compute distances - * between pairs of sequences + * between pairs of sequences. The options parameter provides configuration + * choices for how the similarity score is calculated. * * @author gmcarstairs * @@ -18,7 +19,8 @@ public interface DistanceScoreModelI extends ScoreModelI * measure * * @param seqData + * @param options * @return */ - MatrixI findDistances(AlignmentView seqData); + MatrixI findDistances(AlignmentView seqData, SimilarityParamsI options); } diff --git a/src/jalview/api/analysis/SimilarityScoreModelI.java b/src/jalview/api/analysis/SimilarityScoreModelI.java index 96208a3..0c2624f 100644 --- a/src/jalview/api/analysis/SimilarityScoreModelI.java +++ b/src/jalview/api/analysis/SimilarityScoreModelI.java @@ -15,10 +15,12 @@ public interface SimilarityScoreModelI extends ScoreModelI /** * Returns a similarity score for the given sequence regions, that is, a * matrix whose value [i][j] is the similarity of sequence i to sequence j by - * some measure + * some measure. The options parameter provides configuration choices for how + * the similarity score is calculated. * * @param seqData + * @param options * @return */ - MatrixI findSimilarities(AlignmentView seqData); + MatrixI findSimilarities(AlignmentView seqData, SimilarityParamsI options); } diff --git a/test/jalview/analysis/scoremodels/FeatureDistanceModelTest.java b/test/jalview/analysis/scoremodels/FeatureDistanceModelTest.java index cd6d7f5..1ebef8e 100644 --- a/test/jalview/analysis/scoremodels/FeatureDistanceModelTest.java +++ b/test/jalview/analysis/scoremodels/FeatureDistanceModelTest.java @@ -113,9 +113,9 @@ public class FeatureDistanceModelTest .getAlignPanel())); alf.selectAllSequenceMenuItem_actionPerformed(null); - MatrixI dm = fsm - .findDistances(alf.getViewport().getAlignmentView( - true)); + MatrixI dm = fsm.findDistances( + alf.getViewport().getAlignmentView(true), + SimilarityParams.Jalview); assertEquals(dm.getValue(0, 2), 0d, "FER1_MESCR (0) should be identical with RAPSA (2)"); assertTrue(dm.getValue(0, 1) > dm.getValue(0, 2), @@ -132,9 +132,9 @@ public class FeatureDistanceModelTest assertTrue(fsm.configureFromAlignmentView(alf.getCurrentView() .getAlignPanel())); alf.selectAllSequenceMenuItem_actionPerformed(null); - MatrixI dm = fsm - .findDistances(alf.getViewport().getAlignmentView( - true)); + MatrixI dm = fsm.findDistances( + alf.getViewport().getAlignmentView(true), + SimilarityParams.Jalview); assertEquals(dm.getValue(0, 2), 0d, "FER1_MESCR (0) should be identical with RAPSA (2)"); assertTrue(dm.getValue(0, 1) > dm.getValue(0, 2), @@ -152,9 +152,9 @@ public class FeatureDistanceModelTest assertTrue(fsm.configureFromAlignmentView(alf.getCurrentView() .getAlignPanel())); alf.selectAllSequenceMenuItem_actionPerformed(null); - MatrixI dm = fsm - .findDistances(alf.getViewport().getAlignmentView( - true)); + MatrixI dm = fsm.findDistances( + alf.getViewport().getAlignmentView(true), + SimilarityParams.Jalview); assertEquals( dm.getValue(0, 2), 0d, @@ -253,7 +253,7 @@ public class FeatureDistanceModelTest alf.selectAllSequenceMenuItem_actionPerformed(null); MatrixI distances = fsm.findDistances(alf.getViewport() - .getAlignmentView(true)); + .getAlignmentView(true), SimilarityParams.Jalview); assertEquals(distances.width(), 2); assertEquals(distances.height(), 2); assertEquals(distances.getValue(0, 0), 0d); diff --git a/test/jalview/analysis/scoremodels/ScoreMatrixTest.java b/test/jalview/analysis/scoremodels/ScoreMatrixTest.java index 01de741..1076d43 100644 --- a/test/jalview/analysis/scoremodels/ScoreMatrixTest.java +++ b/test/jalview/analysis/scoremodels/ScoreMatrixTest.java @@ -171,7 +171,7 @@ public class ScoreMatrixTest String[] seqs = new String[] { "FKL", "R D", "QIA", "GWC" }; ScoreMatrix sm = ScoreModels.getInstance().getBlosum62(); - MatrixI pairwise = sm.findSimilarities(seqs); + MatrixI pairwise = sm.findSimilarities(seqs, SimilarityParams.Jalview); /* * should be NxN where N = number of sequences -- 1.7.10.2