/*
- * Jalview - A Sequence Alignment Editor and Viewer (Development Version 2.4.1)
- * Copyright (C) 2009 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle
+ * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
+ * Copyright (C) $$Year-Rel$$ The Jalview Authors
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
+ * This file is part of Jalview.
*
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
+ * Jalview is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, either version 3
+ * of the License, or (at your option) any later version.
+ *
+ * Jalview is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ * PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
+ * The Jalview Authors are detailed in the 'AUTHORS' file.
*/
package jalview.datamodel;
-import jalview.schemes.*;
+import jalview.analysis.scoremodels.ScoreMatrix;
+import jalview.schemes.ResidueProperties;
/**
- * DOCUMENT ME!
+ * Encode a sequence as a numeric vector using either classic residue binary
+ * encoding or convolved with residue substitution matrix.
*
* @author $author$
* @version $Revision$
*/
public class BinarySequence extends Sequence
{
+ public class InvalidSequenceTypeException extends Exception
+ {
+
+ public InvalidSequenceTypeException(String string)
+ {
+ super(string);
+ }
+
+ }
+
int[] binary;
double[] dbinary;
+ boolean isNa = false;
+
/**
* Creates a new BinarySequence object.
*
* @param s
- * DOCUMENT ME!
+ * DOCUMENT ME!
*/
- public BinarySequence(String s)
+ public BinarySequence(String s, boolean isNa)
{
super("", s, 0, s.length());
+ this.isNa = isNa;
}
/**
- * DOCUMENT ME!
+ * clear the dbinary matrix
+ *
+ * @return nores - dimension of sequence symbol encoding for this sequence
*/
- public void encode()
+ private int initMatrixGetNoRes()
{
- // Set all matrix to 0
- dbinary = new double[getSequence().length * 21];
+ int nores = (isNa) ? ResidueProperties.maxNucleotideIndex
+ : ResidueProperties.maxProteinIndex;
- int nores = 21;
+ dbinary = new double[getLength() * nores];
- for (int i = 0; i < dbinary.length; i++)
- {
- dbinary[i] = 0.0;
- }
+ return nores;
+ }
- for (int i = 0; i < getSequence().length; i++)
+ private int[] getSymbolmatrix()
+ {
+ return (isNa) ? ResidueProperties.nucleotideIndex
+ : ResidueProperties.aaIndex;
+ }
+
+ /**
+ * DOCUMENT ME!
+ */
+ public void encode()
+ {
+ int nores = initMatrixGetNoRes();
+ final int[] sindex = getSymbolmatrix();
+ for (int i = 0; i < getLength(); i++)
{
- int aanum = 20;
+ int aanum = nores - 1;
try
{
- aanum = ResidueProperties.aaIndex[getCharAt(i)];
+ aanum = sindex[getCharAt(i)];
} catch (NullPointerException e)
{
- aanum = 20;
+ aanum = nores - 1;
}
- if (aanum > 20)
+ if (aanum >= nores)
{
- aanum = 20;
+ aanum = nores - 1;
}
dbinary[(i * nores) + aanum] = 1.0;
/**
* ancode using substitution matrix given in matrix
*
- * @param matrix
- */
- public void matrixEncode(ScoreMatrix matrix)
- {
- matrixEncode(matrix.isDNA() ? ResidueProperties.nucleotideIndex
- : ResidueProperties.aaIndex, matrix.getMatrix());
- }
-
- /**
- * DOCUMENT ME!
+ * @param smtrx
*/
- public void blosumEncode()
+ public void matrixEncode(final ScoreMatrix smtrx)
+ throws InvalidSequenceTypeException
{
- matrixEncode(ResidueProperties.aaIndex, ResidueProperties.getBLOSUM62());
+ if (isNa != smtrx.isDNA())
+ {
+ throw new InvalidSequenceTypeException(
+ "matrix " + smtrx.getClass().getCanonicalName()
+ + " is not a valid matrix for "
+ + (isNa ? "nucleotide" : "protein") + "sequences");
+ }
+ matrixEncode(smtrx.isDNA() ? ResidueProperties.nucleotideIndex
+ : ResidueProperties.aaIndex, smtrx.getMatrix());
}
- private void matrixEncode(int[] aaIndex, int[][] matrix)
+ private void matrixEncode(final int[] aaIndex, final float[][] matrix)
{
- // Set all matrix to 0
- dbinary = new double[getSequence().length * 21];
-
- int nores = 21;
+ int nores = initMatrixGetNoRes();
- // for (int i = 0; i < dbinary.length; i++) {
- // dbinary[i] = 0.0;
- // }
- for (int i = 0; i < getSequence().length; i++)
+ for (int i = 0, iSize = getLength(); i < iSize; i++)
{
- int aanum = 20;
+ int aanum = nores - 1;
try
{
aanum = aaIndex[getCharAt(i)];
} catch (NullPointerException e)
{
- aanum = 20;
+ aanum = nores - 1;
}
- if (aanum > 20)
+ if (aanum >= nores)
{
- aanum = 20;
+ aanum = nores - 1;
}
- // Do the blosum thing
+ // Do the blosum^H^H^H^H^H score matrix summation thing
- for (int j = 0; j < 20; j++)
+ for (int j = 0; j < nores; j++)
{
dbinary[(i * nores) + j] = matrix[aanum][j];
}
for (int i = 0; i < binary.length; i++)
{
- out += (new Integer(binary[i])).toString();
+ out += (Integer.valueOf(binary[i])).toString();
if (i < (binary.length - 1))
{