From 27b77d2219147d3741d4af7377e13918a8ae972a Mon Sep 17 00:00:00 2001 From: Jim Procter Date: Tue, 15 Apr 2014 16:09:20 +0100 Subject: [PATCH] JAL-1473 refactor score matrices and tree score calculations to interface/api and base implementations --- src/jalview/analysis/NJTree.java | 97 +++----------------- src/jalview/analysis/PCA.java | 18 +++- .../analysis/scoremodels/PIDScoreModel.java | 55 +++++++++++ .../scoremodels/PairwiseSeqScoreModel.java | 61 ++++++++++++ src/jalview/analysis/scoremodels/SWScoreModel.java | 54 +++++++++++ src/jalview/api/analysis/ScoreModelI.java | 16 ++++ src/jalview/datamodel/AlignmentView.java | 11 +++ src/jalview/schemes/ResidueProperties.java | 22 ++++- src/jalview/schemes/ScoreMatrix.java | 23 ++++- test/jalview/schemes/ScoreMatrixPrinter.java | 14 ++- 10 files changed, 272 insertions(+), 99 deletions(-) create mode 100644 src/jalview/analysis/scoremodels/PIDScoreModel.java create mode 100644 src/jalview/analysis/scoremodels/PairwiseSeqScoreModel.java create mode 100644 src/jalview/analysis/scoremodels/SWScoreModel.java create mode 100644 src/jalview/api/analysis/ScoreModelI.java diff --git a/src/jalview/analysis/NJTree.java b/src/jalview/analysis/NJTree.java index 944354f..41d599e 100644 --- a/src/jalview/analysis/NJTree.java +++ b/src/jalview/analysis/NJTree.java @@ -20,6 +20,7 @@ package jalview.analysis; import java.util.*; +import jalview.api.analysis.ScoreModelI; import jalview.datamodel.*; import jalview.io.*; import jalview.schemes.*; @@ -254,8 +255,7 @@ public class NJTree noseqs = i++; - distance = findDistances(this.seqData - .getSequenceStrings(Comparison.GapChars.charAt(0))); + distance = findDistances(); // System.err.println("Made distances");// dbg makeLeaves(); // System.err.println("Made leaves");// dbg @@ -716,100 +716,25 @@ public class NJTree } /** - * DOCUMENT ME! + * Calculate a distance matrix given the sequence input data and score model * - * @return DOCUMENT ME! + * @return similarity matrix used to compute tree */ - public float[][] findDistances(String[] sequenceString) + public float[][] findDistances() { + float[][] distance = new float[noseqs][noseqs]; - if (pwtype.equals("PID")) - { - for (int i = 0; i < (noseqs - 1); i++) - { - for (int j = i; j < noseqs; j++) - { - if (j == i) - { - distance[i][i] = 0; - } - else - { - distance[i][j] = 100 - Comparison.PID(sequenceString[i], - sequenceString[j]); - - distance[j][i] = distance[i][j]; - } - } - } - } - else - { // Pairwise substitution score (with no gap penalties) - ScoreMatrix pwmatrix = ResidueProperties.getScoreMatrix(pwtype); - if (pwmatrix == null) + ScoreModelI _pwmatrix = ResidueProperties.getScoreModel(pwtype); + if (_pwmatrix == null) { - pwmatrix = ResidueProperties.getScoreMatrix("BLOSUM62"); + _pwmatrix = ResidueProperties.getScoreMatrix("BLOSUM62"); } - int maxscore = 0; - int end = sequenceString[0].length(); - for (int i = 0; i < (noseqs - 1); i++) - { - for (int j = i; j < noseqs; j++) - { - int score = 0; - - for (int k = 0; k < end; k++) - { - try - { - score += pwmatrix.getPairwiseScore( - sequenceString[i].charAt(k), - sequenceString[j].charAt(k)); - } catch (Exception ex) - { - System.err.println("err creating BLOSUM62 tree"); - ex.printStackTrace(); - } - } - - distance[i][j] = (float) score; - - if (score > maxscore) - { - maxscore = score; - } - } - } - - for (int i = 0; i < (noseqs - 1); i++) - { - for (int j = i; j < noseqs; j++) - { - distance[i][j] = (float) maxscore - distance[i][j]; - distance[j][i] = distance[i][j]; - } - } - - } + distance = _pwmatrix.findDistances(seqData); return distance; - // else - /* - * else if (pwtype.equals("SW")) { float max = -1; - * - * for (int i = 0; i < (noseqs - 1); i++) { for (int j = i; j < noseqs; j++) - * { AlignSeq as = new AlignSeq(sequence[i], sequence[j], "pep"); - * as.calcScoreMatrix(); as.traceAlignment(); as.printAlignment(System.out); - * distance[i][j] = (float) as.maxscore; - * - * if (max < distance[i][j]) { max = distance[i][j]; } } } - * - * for (int i = 0; i < (noseqs - 1); i++) { for (int j = i; j < noseqs; j++) - * { distance[i][j] = max - distance[i][j]; distance[j][i] = distance[i][j]; - * } } }/ - */ + } /** diff --git a/src/jalview/analysis/PCA.java b/src/jalview/analysis/PCA.java index 89c6353..979968f 100755 --- a/src/jalview/analysis/PCA.java +++ b/src/jalview/analysis/PCA.java @@ -70,6 +70,10 @@ public class PCA implements Runnable */ public PCA(String[] s, boolean nucleotides) { + this(s, nucleotides, null); + } + public PCA(String[] s, boolean nucleotides, String s_m) + { BinarySequence[] bs = new BinarySequence[s.length]; int ii = 0; @@ -83,9 +87,17 @@ public class PCA implements Runnable BinarySequence[] bs2 = new BinarySequence[s.length]; ii = 0; - - String sm = nucleotides ? "DNA" : "BLOSUM62"; - ScoreMatrix smtrx = ResidueProperties.getScoreMatrix(sm); + ScoreMatrix smtrx = null; + String sm=s_m; + if (sm!=null) + { + smtrx = ResidueProperties.getScoreMatrix(sm); + } + if (smtrx==null) + { + // either we were given a non-existent score matrix or a scoremodel that isn't based on a pairwise symbol score matrix + smtrx = ResidueProperties.getScoreMatrix(sm=(nucleotides ? "DNA" : "BLOSUM62")); + } details.append("PCA calculation using " + sm + " sequence similarity matrix\n========\n\n"); while ((ii < s.length) && (s[ii] != null)) diff --git a/src/jalview/analysis/scoremodels/PIDScoreModel.java b/src/jalview/analysis/scoremodels/PIDScoreModel.java new file mode 100644 index 0000000..2069b50 --- /dev/null +++ b/src/jalview/analysis/scoremodels/PIDScoreModel.java @@ -0,0 +1,55 @@ +package jalview.analysis.scoremodels; + +import jalview.api.analysis.ScoreModelI; +import jalview.datamodel.AlignmentView; +import jalview.util.Comparison; + +public class PIDScoreModel implements ScoreModelI +{ + + @Override + public float[][] findDistances(AlignmentView seqData) + { + String[] sequenceString = seqData + .getSequenceStrings(Comparison.GapChars.charAt(0)); + int noseqs = sequenceString.length; + float[][] distance = new float[noseqs][noseqs]; + for (int i = 0; i < (noseqs - 1); i++) + { + for (int j = i; j < noseqs; j++) + { + if (j == i) + { + distance[i][i] = 0; + } + else + { + distance[i][j] = 100 - Comparison.PID(sequenceString[i], + sequenceString[j]); + + distance[j][i] = distance[i][j]; + } + } + } + return distance; + } + + @Override + public String getName() + { + return "PID"; + } + + @Override + public boolean isDNA() + { + return true; + } + + @Override + public boolean isProtein() + { + return true; + } + +} diff --git a/src/jalview/analysis/scoremodels/PairwiseSeqScoreModel.java b/src/jalview/analysis/scoremodels/PairwiseSeqScoreModel.java new file mode 100644 index 0000000..78c5f17 --- /dev/null +++ b/src/jalview/analysis/scoremodels/PairwiseSeqScoreModel.java @@ -0,0 +1,61 @@ +package jalview.analysis.scoremodels; + +import jalview.api.analysis.ScoreModelI; +import jalview.datamodel.AlignmentView; +import jalview.schemes.ScoreMatrix; +import jalview.util.Comparison; + +public abstract class PairwiseSeqScoreModel implements ScoreModelI +{ + abstract public int getPairwiseScore(char c, char d); + + public float[][] findDistances(AlignmentView seqData) + { + String[] sequenceString = seqData + .getSequenceStrings(Comparison.GapChars.charAt(0)); + int noseqs = sequenceString.length; + float[][] distance = new float[noseqs][noseqs]; + + int maxscore = 0; + int end = sequenceString[0].length(); + for (int i = 0; i < (noseqs - 1); i++) + { + for (int j = i; j < noseqs; j++) + { + int score = 0; + + for (int k = 0; k < end; k++) + { + try + { + score += getPairwiseScore(sequenceString[i].charAt(k), + sequenceString[j].charAt(k)); + } catch (Exception ex) + { + System.err.println("err creating " + getName() + " tree"); + ex.printStackTrace(); + } + } + + distance[i][j] = (float) score; + + if (score > maxscore) + { + maxscore = score; + } + } + } + + for (int i = 0; i < (noseqs - 1); i++) + { + for (int j = i; j < noseqs; j++) + { + distance[i][j] = (float) maxscore - distance[i][j]; + distance[j][i] = distance[i][j]; + } + } + return distance; + } + + abstract public int[][] getMatrix(); +} \ No newline at end of file diff --git a/src/jalview/analysis/scoremodels/SWScoreModel.java b/src/jalview/analysis/scoremodels/SWScoreModel.java new file mode 100644 index 0000000..d8c6230 --- /dev/null +++ b/src/jalview/analysis/scoremodels/SWScoreModel.java @@ -0,0 +1,54 @@ +package jalview.analysis.scoremodels; + +import jalview.analysis.AlignSeq; +import jalview.api.analysis.ScoreModelI; +import jalview.datamodel.AlignmentView; +import jalview.datamodel.SequenceI; +import jalview.util.Comparison; + +public class SWScoreModel implements ScoreModelI +{ + + @Override + public float[][] findDistances(AlignmentView seqData) + { + SequenceI[] sequenceString = seqData + .getVisibleAlignment(Comparison.GapChars.charAt(0)).getSequencesArray(); + int noseqs = sequenceString.length; + float[][] distance = new float[noseqs][noseqs]; + + float max = -1; + + for (int i = 0; i < (noseqs - 1); i++) { for (int j = i; j < noseqs; j++) + { AlignSeq as = new AlignSeq(sequenceString[i], sequenceString[j], seqData.isNa() ? "dna" : "pep"); + as.calcScoreMatrix(); as.traceAlignment(); as.printAlignment(System.out); + distance[i][j] = (float) as.maxscore; + + if (max < distance[i][j]) { max = distance[i][j]; } } } + + for (int i = 0; i < (noseqs - 1); i++) { for (int j = i; j < noseqs; j++) + { distance[i][j] = max - distance[i][j]; distance[j][i] = distance[i][j]; + } } + + return distance; + } + + @Override + public String getName() + { + return "Smith Waterman Score"; + } + @Override + public boolean isDNA() + { + return true; + } + @Override + public boolean isProtein() + { + return true; + } + public String toString() { + return "Score between two sequences aligned with Smith Waterman with default Peptide/Nucleotide matrix"; + } +} diff --git a/src/jalview/api/analysis/ScoreModelI.java b/src/jalview/api/analysis/ScoreModelI.java new file mode 100644 index 0000000..0d56033 --- /dev/null +++ b/src/jalview/api/analysis/ScoreModelI.java @@ -0,0 +1,16 @@ +package jalview.api.analysis; + +import jalview.datamodel.AlignmentView; + +public interface ScoreModelI +{ + + float[][] findDistances(AlignmentView seqData); + + String getName(); + + boolean isDNA(); + + boolean isProtein(); + +} diff --git a/src/jalview/datamodel/AlignmentView.java b/src/jalview/datamodel/AlignmentView.java index ea0fbe0..273a685 100644 --- a/src/jalview/datamodel/AlignmentView.java +++ b/src/jalview/datamodel/AlignmentView.java @@ -46,6 +46,16 @@ public class AlignmentView */ private Vector scGroups; + private boolean isNa=false; + /** + * false if the view concerns peptides + * @return + */ + public boolean isNa() + { + return isNa; + } + /** * Group defined over SeqCigars. Unlike AlignmentI associated groups, each * SequenceGroup hold just the essential properties for the group, but no @@ -99,6 +109,7 @@ public class AlignmentView (selectedRegionOnly ? selection : null)), (selectedRegionOnly && selection != null) ? selection .getStartRes() : 0); + isNa = alignment.isNucleotide(); // walk down SeqCigar array and Alignment Array - optionally restricted by // selected region. // test group membership for each sequence in each group, store membership diff --git a/src/jalview/schemes/ResidueProperties.java b/src/jalview/schemes/ResidueProperties.java index 9acfc24..98be0c8 100755 --- a/src/jalview/schemes/ResidueProperties.java +++ b/src/jalview/schemes/ResidueProperties.java @@ -18,14 +18,16 @@ */ package jalview.schemes; +import jalview.analysis.scoremodels.PIDScoreModel; +import jalview.api.analysis.ScoreModelI; + import java.util.*; import java.util.List; - import java.awt.*; public class ResidueProperties { - public static Hashtable scoreMatrices = new Hashtable(); + public static Hashtable scoreMatrices = new Hashtable(); // Stores residue codes/names and colours and other things public static final int[] aaIndex; // aaHash version 2.1.1 and below @@ -1416,6 +1418,10 @@ public class ResidueProperties propHash.put("proline", proline); propHash.put("polar", polar); } + static + { + scoreMatrices.put("PID", new PIDScoreModel()); + } private ResidueProperties() { @@ -1540,12 +1546,22 @@ public class ResidueProperties public static ScoreMatrix getScoreMatrix(String pwtype) { Object val = scoreMatrices.get(pwtype); - if (val != null) + if (val != null && val instanceof ScoreMatrix) { return (ScoreMatrix) val; } return null; } + /** + * get a ScoreModel based on its string name + * + * @param pwtype + * @return scoremodel of type pwtype or null + */ + public static ScoreModelI getScoreModel(String pwtype) + { + return scoreMatrices.get(pwtype); + } public static int getPAM250(char c, char d) { diff --git a/src/jalview/schemes/ScoreMatrix.java b/src/jalview/schemes/ScoreMatrix.java index e78b92c..ab603e1 100644 --- a/src/jalview/schemes/ScoreMatrix.java +++ b/src/jalview/schemes/ScoreMatrix.java @@ -18,9 +18,18 @@ */ package jalview.schemes; -public class ScoreMatrix +import jalview.analysis.scoremodels.PairwiseSeqScoreModel; +import jalview.api.analysis.ScoreModelI; + +public class ScoreMatrix extends PairwiseSeqScoreModel implements ScoreModelI { String name; + + @Override + public String getName() + { + return name; + } /** * reference to integer score matrix @@ -31,23 +40,31 @@ public class ScoreMatrix * 0 for Protein Score matrix. 1 for dna score matrix */ int type; - + /** + * + * @param name Unique, human readable name for the matrix + * @param matrix Pairwise scores indexed according to appropriate symbol alphabet + * @param type 0 for Protein, 1 for NA + */ ScoreMatrix(String name, int[][] matrix, int type) { this.matrix = matrix; this.type = type; + this.name = name; } + @Override public boolean isDNA() { return type == 1; } - + @Override public boolean isProtein() { return type == 0; } + @Override public int[][] getMatrix() { return matrix; diff --git a/test/jalview/schemes/ScoreMatrixPrinter.java b/test/jalview/schemes/ScoreMatrixPrinter.java index a472951..2830918 100644 --- a/test/jalview/schemes/ScoreMatrixPrinter.java +++ b/test/jalview/schemes/ScoreMatrixPrinter.java @@ -18,6 +18,8 @@ */ package jalview.schemes; +import jalview.api.analysis.ScoreModelI; + import java.util.Map; import org.junit.Test; @@ -28,7 +30,7 @@ public class ScoreMatrixPrinter @Test public void printAllMatrices() { - for (Map.Entry sm:((Map) ResidueProperties.scoreMatrices).entrySet()) + for (Map.Entry sm: ResidueProperties.scoreMatrices.entrySet()) { System.out.println("Matrix "+sm.getKey()); System.out.println(sm.getValue().toString()); @@ -37,10 +39,14 @@ public class ScoreMatrixPrinter @Test public void printHTMLMatrices() { - for (Map.Entry sm:((Map) ResidueProperties.scoreMatrices).entrySet()) + for (Map.Entry _sm: ResidueProperties.scoreMatrices.entrySet()) { - System.out.println("Matrix "+sm.getKey()); - System.out.println(sm.getValue().outputMatrix(true)); + if (_sm.getValue() instanceof ScoreMatrix) + { + ScoreMatrix sm = (ScoreMatrix) _sm.getValue(); + System.out.println("Matrix "+_sm.getKey()); + System.out.println(sm.outputMatrix(true)); + } } } -- 1.7.10.2