import java.util.*;
+import jalview.api.analysis.ScoreModelI;
import jalview.datamodel.*;
import jalview.io.*;
import jalview.schemes.*;
noseqs = i++;
- distance = findDistances(this.seqData
- .getSequenceStrings(Comparison.GapChars.charAt(0)));
+ distance = findDistances();
// System.err.println("Made distances");// dbg
makeLeaves();
// System.err.println("Made leaves");// dbg
}
/**
- * DOCUMENT ME!
+ * Calculate a distance matrix given the sequence input data and score model
*
- * @return DOCUMENT ME!
+ * @return similarity matrix used to compute tree
*/
- public float[][] findDistances(String[] sequenceString)
+ public float[][] findDistances()
{
+
float[][] distance = new float[noseqs][noseqs];
- if (pwtype.equals("PID"))
- {
- for (int i = 0; i < (noseqs - 1); i++)
- {
- for (int j = i; j < noseqs; j++)
- {
- if (j == i)
- {
- distance[i][i] = 0;
- }
- else
- {
- distance[i][j] = 100 - Comparison.PID(sequenceString[i],
- sequenceString[j]);
-
- distance[j][i] = distance[i][j];
- }
- }
- }
- }
- else
- {
// Pairwise substitution score (with no gap penalties)
- ScoreMatrix pwmatrix = ResidueProperties.getScoreMatrix(pwtype);
- if (pwmatrix == null)
+ ScoreModelI _pwmatrix = ResidueProperties.getScoreModel(pwtype);
+ if (_pwmatrix == null)
{
- pwmatrix = ResidueProperties.getScoreMatrix("BLOSUM62");
+ _pwmatrix = ResidueProperties.getScoreMatrix("BLOSUM62");
}
- int maxscore = 0;
- int end = sequenceString[0].length();
- for (int i = 0; i < (noseqs - 1); i++)
- {
- for (int j = i; j < noseqs; j++)
- {
- int score = 0;
-
- for (int k = 0; k < end; k++)
- {
- try
- {
- score += pwmatrix.getPairwiseScore(
- sequenceString[i].charAt(k),
- sequenceString[j].charAt(k));
- } catch (Exception ex)
- {
- System.err.println("err creating BLOSUM62 tree");
- ex.printStackTrace();
- }
- }
-
- distance[i][j] = (float) score;
-
- if (score > maxscore)
- {
- maxscore = score;
- }
- }
- }
-
- for (int i = 0; i < (noseqs - 1); i++)
- {
- for (int j = i; j < noseqs; j++)
- {
- distance[i][j] = (float) maxscore - distance[i][j];
- distance[j][i] = distance[i][j];
- }
- }
-
- }
+ distance = _pwmatrix.findDistances(seqData);
return distance;
- // else
- /*
- * else if (pwtype.equals("SW")) { float max = -1;
- *
- * for (int i = 0; i < (noseqs - 1); i++) { for (int j = i; j < noseqs; j++)
- * { AlignSeq as = new AlignSeq(sequence[i], sequence[j], "pep");
- * as.calcScoreMatrix(); as.traceAlignment(); as.printAlignment(System.out);
- * distance[i][j] = (float) as.maxscore;
- *
- * if (max < distance[i][j]) { max = distance[i][j]; } } }
- *
- * for (int i = 0; i < (noseqs - 1); i++) { for (int j = i; j < noseqs; j++)
- * { distance[i][j] = max - distance[i][j]; distance[j][i] = distance[i][j];
- * } } }/
- */
+
}
/**
*/
public PCA(String[] s, boolean nucleotides)
{
+ this(s, nucleotides, null);
+ }
+ public PCA(String[] s, boolean nucleotides, String s_m)
+ {
BinarySequence[] bs = new BinarySequence[s.length];
int ii = 0;
BinarySequence[] bs2 = new BinarySequence[s.length];
ii = 0;
-
- String sm = nucleotides ? "DNA" : "BLOSUM62";
- ScoreMatrix smtrx = ResidueProperties.getScoreMatrix(sm);
+ ScoreMatrix smtrx = null;
+ String sm=s_m;
+ if (sm!=null)
+ {
+ smtrx = ResidueProperties.getScoreMatrix(sm);
+ }
+ if (smtrx==null)
+ {
+ // either we were given a non-existent score matrix or a scoremodel that isn't based on a pairwise symbol score matrix
+ smtrx = ResidueProperties.getScoreMatrix(sm=(nucleotides ? "DNA" : "BLOSUM62"));
+ }
details.append("PCA calculation using " + sm
+ " sequence similarity matrix\n========\n\n");
while ((ii < s.length) && (s[ii] != null))
--- /dev/null
+package jalview.analysis.scoremodels;
+
+import jalview.api.analysis.ScoreModelI;
+import jalview.datamodel.AlignmentView;
+import jalview.util.Comparison;
+
+public class PIDScoreModel implements ScoreModelI
+{
+
+ @Override
+ public float[][] findDistances(AlignmentView seqData)
+ {
+ String[] sequenceString = seqData
+ .getSequenceStrings(Comparison.GapChars.charAt(0));
+ int noseqs = sequenceString.length;
+ float[][] distance = new float[noseqs][noseqs];
+ for (int i = 0; i < (noseqs - 1); i++)
+ {
+ for (int j = i; j < noseqs; j++)
+ {
+ if (j == i)
+ {
+ distance[i][i] = 0;
+ }
+ else
+ {
+ distance[i][j] = 100 - Comparison.PID(sequenceString[i],
+ sequenceString[j]);
+
+ distance[j][i] = distance[i][j];
+ }
+ }
+ }
+ return distance;
+ }
+
+ @Override
+ public String getName()
+ {
+ return "PID";
+ }
+
+ @Override
+ public boolean isDNA()
+ {
+ return true;
+ }
+
+ @Override
+ public boolean isProtein()
+ {
+ return true;
+ }
+
+}
--- /dev/null
+package jalview.analysis.scoremodels;
+
+import jalview.api.analysis.ScoreModelI;
+import jalview.datamodel.AlignmentView;
+import jalview.schemes.ScoreMatrix;
+import jalview.util.Comparison;
+
+public abstract class PairwiseSeqScoreModel implements ScoreModelI
+{
+ abstract public int getPairwiseScore(char c, char d);
+
+ public float[][] findDistances(AlignmentView seqData)
+ {
+ String[] sequenceString = seqData
+ .getSequenceStrings(Comparison.GapChars.charAt(0));
+ int noseqs = sequenceString.length;
+ float[][] distance = new float[noseqs][noseqs];
+
+ int maxscore = 0;
+ int end = sequenceString[0].length();
+ for (int i = 0; i < (noseqs - 1); i++)
+ {
+ for (int j = i; j < noseqs; j++)
+ {
+ int score = 0;
+
+ for (int k = 0; k < end; k++)
+ {
+ try
+ {
+ score += getPairwiseScore(sequenceString[i].charAt(k),
+ sequenceString[j].charAt(k));
+ } catch (Exception ex)
+ {
+ System.err.println("err creating " + getName() + " tree");
+ ex.printStackTrace();
+ }
+ }
+
+ distance[i][j] = (float) score;
+
+ if (score > maxscore)
+ {
+ maxscore = score;
+ }
+ }
+ }
+
+ for (int i = 0; i < (noseqs - 1); i++)
+ {
+ for (int j = i; j < noseqs; j++)
+ {
+ distance[i][j] = (float) maxscore - distance[i][j];
+ distance[j][i] = distance[i][j];
+ }
+ }
+ return distance;
+ }
+
+ abstract public int[][] getMatrix();
+}
\ No newline at end of file
--- /dev/null
+package jalview.analysis.scoremodels;
+
+import jalview.analysis.AlignSeq;
+import jalview.api.analysis.ScoreModelI;
+import jalview.datamodel.AlignmentView;
+import jalview.datamodel.SequenceI;
+import jalview.util.Comparison;
+
+public class SWScoreModel implements ScoreModelI
+{
+
+ @Override
+ public float[][] findDistances(AlignmentView seqData)
+ {
+ SequenceI[] sequenceString = seqData
+ .getVisibleAlignment(Comparison.GapChars.charAt(0)).getSequencesArray();
+ int noseqs = sequenceString.length;
+ float[][] distance = new float[noseqs][noseqs];
+
+ float max = -1;
+
+ for (int i = 0; i < (noseqs - 1); i++) { for (int j = i; j < noseqs; j++)
+ { AlignSeq as = new AlignSeq(sequenceString[i], sequenceString[j], seqData.isNa() ? "dna" : "pep");
+ as.calcScoreMatrix(); as.traceAlignment(); as.printAlignment(System.out);
+ distance[i][j] = (float) as.maxscore;
+
+ if (max < distance[i][j]) { max = distance[i][j]; } } }
+
+ for (int i = 0; i < (noseqs - 1); i++) { for (int j = i; j < noseqs; j++)
+ { distance[i][j] = max - distance[i][j]; distance[j][i] = distance[i][j];
+ } }
+
+ return distance;
+ }
+
+ @Override
+ public String getName()
+ {
+ return "Smith Waterman Score";
+ }
+ @Override
+ public boolean isDNA()
+ {
+ return true;
+ }
+ @Override
+ public boolean isProtein()
+ {
+ return true;
+ }
+ public String toString() {
+ return "Score between two sequences aligned with Smith Waterman with default Peptide/Nucleotide matrix";
+ }
+}
--- /dev/null
+package jalview.api.analysis;
+
+import jalview.datamodel.AlignmentView;
+
+public interface ScoreModelI
+{
+
+ float[][] findDistances(AlignmentView seqData);
+
+ String getName();
+
+ boolean isDNA();
+
+ boolean isProtein();
+
+}
*/
private Vector scGroups;
+ private boolean isNa=false;
+ /**
+ * false if the view concerns peptides
+ * @return
+ */
+ public boolean isNa()
+ {
+ return isNa;
+ }
+
/**
* Group defined over SeqCigars. Unlike AlignmentI associated groups, each
* SequenceGroup hold just the essential properties for the group, but no
(selectedRegionOnly ? selection : null)),
(selectedRegionOnly && selection != null) ? selection
.getStartRes() : 0);
+ isNa = alignment.isNucleotide();
// walk down SeqCigar array and Alignment Array - optionally restricted by
// selected region.
// test group membership for each sequence in each group, store membership
*/
package jalview.schemes;
+import jalview.analysis.scoremodels.PIDScoreModel;
+import jalview.api.analysis.ScoreModelI;
+
import java.util.*;
import java.util.List;
-
import java.awt.*;
public class ResidueProperties
{
- public static Hashtable scoreMatrices = new Hashtable();
+ public static Hashtable<String,ScoreModelI> scoreMatrices = new Hashtable();
// Stores residue codes/names and colours and other things
public static final int[] aaIndex; // aaHash version 2.1.1 and below
propHash.put("proline", proline);
propHash.put("polar", polar);
}
+ static
+ {
+ scoreMatrices.put("PID", new PIDScoreModel());
+ }
private ResidueProperties()
{
public static ScoreMatrix getScoreMatrix(String pwtype)
{
Object val = scoreMatrices.get(pwtype);
- if (val != null)
+ if (val != null && val instanceof ScoreMatrix)
{
return (ScoreMatrix) val;
}
return null;
}
+ /**
+ * get a ScoreModel based on its string name
+ *
+ * @param pwtype
+ * @return scoremodel of type pwtype or null
+ */
+ public static ScoreModelI getScoreModel(String pwtype)
+ {
+ return scoreMatrices.get(pwtype);
+ }
public static int getPAM250(char c, char d)
{
*/
package jalview.schemes;
-public class ScoreMatrix
+import jalview.analysis.scoremodels.PairwiseSeqScoreModel;
+import jalview.api.analysis.ScoreModelI;
+
+public class ScoreMatrix extends PairwiseSeqScoreModel implements ScoreModelI
{
String name;
+
+ @Override
+ public String getName()
+ {
+ return name;
+ }
/**
* reference to integer score matrix
* 0 for Protein Score matrix. 1 for dna score matrix
*/
int type;
-
+ /**
+ *
+ * @param name Unique, human readable name for the matrix
+ * @param matrix Pairwise scores indexed according to appropriate symbol alphabet
+ * @param type 0 for Protein, 1 for NA
+ */
ScoreMatrix(String name, int[][] matrix, int type)
{
this.matrix = matrix;
this.type = type;
+ this.name = name;
}
+ @Override
public boolean isDNA()
{
return type == 1;
}
-
+ @Override
public boolean isProtein()
{
return type == 0;
}
+ @Override
public int[][] getMatrix()
{
return matrix;
*/
package jalview.schemes;
+import jalview.api.analysis.ScoreModelI;
+
import java.util.Map;
import org.junit.Test;
@Test
public void printAllMatrices()
{
- for (Map.Entry<String,ScoreMatrix> sm:((Map<String, ScoreMatrix>) ResidueProperties.scoreMatrices).entrySet())
+ for (Map.Entry<String,ScoreModelI> sm: ResidueProperties.scoreMatrices.entrySet())
{
System.out.println("Matrix "+sm.getKey());
System.out.println(sm.getValue().toString());
@Test
public void printHTMLMatrices()
{
- for (Map.Entry<String,ScoreMatrix> sm:((Map<String, ScoreMatrix>) ResidueProperties.scoreMatrices).entrySet())
+ for (Map.Entry<String,ScoreModelI> _sm: ResidueProperties.scoreMatrices.entrySet())
{
- System.out.println("Matrix "+sm.getKey());
- System.out.println(sm.getValue().outputMatrix(true));
+ if (_sm.getValue() instanceof ScoreMatrix)
+ {
+ ScoreMatrix sm = (ScoreMatrix) _sm.getValue();
+ System.out.println("Matrix "+_sm.getKey());
+ System.out.println(sm.outputMatrix(true));
+ }
}
}