X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fdatamodel%2FBinarySequence.java;h=62171f7ec2c049f05a76eac31ea2a1a506272377;hb=b57a02c25e335d033c97f8a6bacd6b54f62bd2b6;hp=04cfdc9ab712a60b21e51a242514802ff0107e3b;hpb=797df64fa2a0a30773d0f48f5494d4155e5a8be3;p=jalview.git diff --git a/src/jalview/datamodel/BinarySequence.java b/src/jalview/datamodel/BinarySequence.java index 04cfdc9..62171f7 100755 --- a/src/jalview/datamodel/BinarySequence.java +++ b/src/jalview/datamodel/BinarySequence.java @@ -20,58 +20,89 @@ package jalview.datamodel; import jalview.schemes.*; /** - * DOCUMENT ME! + * Encode a sequence as a numeric vector using either classic residue binary + * encoding or convolved with residue substitution matrix. * * @author $author$ * @version $Revision$ */ public class BinarySequence extends Sequence { + public class InvalidSequenceTypeException extends Exception + { + + public InvalidSequenceTypeException(String string) + { + super(string); + } + + } + int[] binary; double[] dbinary; + boolean isNa = false; + /** * Creates a new BinarySequence object. * * @param s * DOCUMENT ME! */ - public BinarySequence(String s) + public BinarySequence(String s, boolean isNa) { super("", s, 0, s.length()); + this.isNa = isNa; } /** - * DOCUMENT ME! + * clear the dbinary matrix + * + * @return nores - dimension of sequence symbol encoding for this sequence */ - public void encode() + private int initMatrixGetNoRes() { + int nores = (isNa) ? ResidueProperties.maxNucleotideIndex + : ResidueProperties.maxProteinIndex; // Set all matrix to 0 - dbinary = new double[getSequence().length * 21]; - - int nores = 21; + dbinary = new double[getSequence().length * nores]; for (int i = 0; i < dbinary.length; i++) { dbinary[i] = 0.0; } + return nores; + } + + private int[] getSymbolmatrix() + { + return (isNa) ? ResidueProperties.nucleotideIndex + : ResidueProperties.aaIndex; + } + /** + * DOCUMENT ME! + */ + public void encode() + { + int nores = initMatrixGetNoRes(); + final int[] sindex = getSymbolmatrix(); for (int i = 0; i < getSequence().length; i++) { - int aanum = 20; + int aanum = nores - 1; try { - aanum = ResidueProperties.aaIndex[getCharAt(i)]; + aanum = sindex[getCharAt(i)]; } catch (NullPointerException e) { - aanum = 20; + aanum = nores - 1; } - if (aanum > 20) + if (aanum >= nores) { - aanum = 20; + aanum = nores - 1; } dbinary[(i * nores) + aanum] = 1.0; @@ -83,50 +114,50 @@ public class BinarySequence extends Sequence * * @param matrix */ - public void matrixEncode(ScoreMatrix matrix) + public void matrixEncode(final ScoreMatrix matrix) + throws InvalidSequenceTypeException { + if (isNa != matrix.isDNA()) + { + throw new InvalidSequenceTypeException("matrix " + + matrix.getClass().getCanonicalName() + + " is not a valid matrix for " + + (isNa ? "nucleotide" : "protein") + "sequences"); + } matrixEncode(matrix.isDNA() ? ResidueProperties.nucleotideIndex : ResidueProperties.aaIndex, matrix.getMatrix()); } - /** - * DOCUMENT ME! - */ - public void blosumEncode() - { - matrixEncode(ResidueProperties.aaIndex, ResidueProperties.getBLOSUM62()); - } - - private void matrixEncode(int[] aaIndex, int[][] matrix) + private void matrixEncode(final int[] aaIndex, final int[][] matrix) { // Set all matrix to 0 - dbinary = new double[getSequence().length * 21]; + // dbinary = new double[getSequence().length * 21]; - int nores = 21; + int nores = initMatrixGetNoRes(); // for (int i = 0; i < dbinary.length; i++) { // dbinary[i] = 0.0; // } - for (int i = 0; i < getSequence().length; i++) + for (int i = 0, iSize = getSequence().length; i < iSize; i++) { - int aanum = 20; + int aanum = nores - 1; try { aanum = aaIndex[getCharAt(i)]; } catch (NullPointerException e) { - aanum = 20; + aanum = nores - 1; } - if (aanum > 20) + if (aanum >= nores) { - aanum = 20; + aanum = nores - 1; } - // Do the blosum thing + // Do the blosum^H^H^H^H^H score matrix summation thing - for (int j = 0; j < 20; j++) + for (int j = 0; j < nores; j++) { dbinary[(i * nores) + j] = matrix[aanum][j]; }