X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fdatamodel%2FBinarySequence.java;h=cea9de7ecec5c0095a40a0ccc7388f63067d525a;hb=HEAD;hp=89d39716ba5dc3d4c54a89f4b41a091be0685c91;hpb=f24dacb1da56fccf05d684e2f4899facec2aecf7;p=jalview.git diff --git a/src/jalview/datamodel/BinarySequence.java b/src/jalview/datamodel/BinarySequence.java index 89d3971..cea9de7 100755 --- a/src/jalview/datamodel/BinarySequence.java +++ b/src/jalview/datamodel/BinarySequence.java @@ -1,180 +1,193 @@ -/* -* Jalview - A Sequence Alignment Editor and Viewer -* Copyright (C) 2005 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle -* -* This program is free software; you can redistribute it and/or -* modify it under the terms of the GNU General Public License -* as published by the Free Software Foundation; either version 2 -* of the License, or (at your option) any later version. -* -* This program is distributed in the hope that it will be useful, -* but WITHOUT ANY WARRANTY; without even the implied warranty of -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -* GNU General Public License for more details. -* -* You should have received a copy of the GNU General Public License -* along with this program; if not, write to the Free Software -* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA -*/ -package jalview.datamodel; - -import jalview.schemes.*; - - - -/** - * DOCUMENT ME! - * - * @author $author$ - * @version $Revision$ - */ -public class BinarySequence extends Sequence -{ - int[] binary; - double[] dbinary; - - /** - * Creates a new BinarySequence object. - * - * @param s DOCUMENT ME! - */ - public BinarySequence(SequenceI s) - { - super(s.getName(), s.getSequence(), s.getStart(), s.getEnd()); - } - - /** - * Creates a new BinarySequence object. - * - * @param name DOCUMENT ME! - * @param sequence DOCUMENT ME! - * @param start DOCUMENT ME! - * @param end DOCUMENT ME! - */ - public BinarySequence(String name, String sequence, int start, int end) - { - super(name, sequence, start, end); - } - - /** - * DOCUMENT ME! - */ - public void encode() - { - // Set all matrix to 0 - dbinary = new double[getSequence().length() * 21]; - - int nores = 21; - - for (int i = 0; i < dbinary.length; i++) - { - dbinary[i] = 0.0; - } - - for (int i = 0; i < getSequence().length(); i++) - { - int aanum = 20; - - try - { - aanum = ((Integer) ResidueProperties.getAAHash().get(getSequence() - .substring(i, - i + 1))).intValue(); - } - catch (NullPointerException e) - { - aanum = 20; - } - - if (aanum > 20) - { - aanum = 20; - } - - dbinary[(i * nores) + aanum] = 1.0; - } - } - - /** - * DOCUMENT ME! - */ - public void blosumEncode() - { - // Set all matrix to 0 - dbinary = new double[getSequence().length() * 21]; - - int nores = 21; - - //for (int i = 0; i < dbinary.length; i++) { - // dbinary[i] = 0.0; - //} - for (int i = 0; i < getSequence().length(); i++) - { - int aanum = 20; - - try - { - aanum = ((Integer) ResidueProperties.getAAHash().get(getSequence() - .substring(i, - i + 1))).intValue(); - } - catch (NullPointerException e) - { - aanum = 20; - } - - if (aanum > 20) - { - aanum = 20; - } - - // Do the blosum thing - for (int j = 0; j < 20; j++) - { - dbinary[(i * nores) + j] = ResidueProperties.getBLOSUM62()[aanum][j]; - } - } - } - - /** - * DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - public String toBinaryString() - { - String out = ""; - - for (int i = 0; i < binary.length; i++) - { - out += (new Integer(binary[i])).toString(); - - if (i < (binary.length - 1)) - { - out += " "; - } - } - - return out; - } - - /** - * DOCUMENT ME! - * - * @return DOCUMENT ME! - */ - public double[] getDBinary() - { - return dbinary; - } - - /** - * DOCUMENT ME! - * - * @param rt DOCUMENT ME! - */ - public static void printMemory(Runtime rt) - { - System.out.println("DEBUG: Free memory = " + rt.freeMemory()); // log. - } -} +/* + * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) + * Copyright (C) $$Year-Rel$$ The Jalview Authors + * + * This file is part of Jalview. + * + * Jalview is free software: you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation, either version 3 + * of the License, or (at your option) any later version. + * + * Jalview is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Jalview. If not, see . + * The Jalview Authors are detailed in the 'AUTHORS' file. + */ +package jalview.datamodel; + +import jalview.analysis.scoremodels.ScoreMatrix; +import jalview.schemes.ResidueProperties; + +/** + * Encode a sequence as a numeric vector using either classic residue binary + * encoding or convolved with residue substitution matrix. + * + * @author $author$ + * @version $Revision$ + */ +public class BinarySequence extends Sequence +{ + public class InvalidSequenceTypeException extends Exception + { + + public InvalidSequenceTypeException(String string) + { + super(string); + } + + } + + int[] binary; + + double[] dbinary; + + boolean isNa = false; + + /** + * Creates a new BinarySequence object. + * + * @param s + * DOCUMENT ME! + */ + public BinarySequence(String s, boolean isNa) + { + super("", s, 0, s.length()); + this.isNa = isNa; + } + + /** + * clear the dbinary matrix + * + * @return nores - dimension of sequence symbol encoding for this sequence + */ + private int initMatrixGetNoRes() + { + int nores = (isNa) ? ResidueProperties.maxNucleotideIndex + : ResidueProperties.maxProteinIndex; + + dbinary = new double[getLength() * nores]; + + return nores; + } + + private int[] getSymbolmatrix() + { + return (isNa) ? ResidueProperties.nucleotideIndex + : ResidueProperties.aaIndex; + } + + /** + * DOCUMENT ME! + */ + public void encode() + { + int nores = initMatrixGetNoRes(); + final int[] sindex = getSymbolmatrix(); + for (int i = 0; i < getLength(); i++) + { + int aanum = nores - 1; + + try + { + aanum = sindex[getCharAt(i)]; + } catch (NullPointerException e) + { + aanum = nores - 1; + } + + if (aanum >= nores) + { + aanum = nores - 1; + } + + dbinary[(i * nores) + aanum] = 1.0; + } + } + + /** + * ancode using substitution matrix given in matrix + * + * @param smtrx + */ + public void matrixEncode(final ScoreMatrix smtrx) + throws InvalidSequenceTypeException + { + if (isNa != smtrx.isDNA()) + { + throw new InvalidSequenceTypeException( + "matrix " + smtrx.getClass().getCanonicalName() + + " is not a valid matrix for " + + (isNa ? "nucleotide" : "protein") + "sequences"); + } + matrixEncode(smtrx.isDNA() ? ResidueProperties.nucleotideIndex + : ResidueProperties.aaIndex, smtrx.getMatrix()); + } + + private void matrixEncode(final int[] aaIndex, final float[][] matrix) + { + int nores = initMatrixGetNoRes(); + + for (int i = 0, iSize = getLength(); i < iSize; i++) + { + int aanum = nores - 1; + + try + { + aanum = aaIndex[getCharAt(i)]; + } catch (NullPointerException e) + { + aanum = nores - 1; + } + + if (aanum >= nores) + { + aanum = nores - 1; + } + + // Do the blosum^H^H^H^H^H score matrix summation thing + + for (int j = 0; j < nores; j++) + { + dbinary[(i * nores) + j] = matrix[aanum][j]; + } + } + } + + /** + * DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + public String toBinaryString() + { + String out = ""; + + for (int i = 0; i < binary.length; i++) + { + out += (Integer.valueOf(binary[i])).toString(); + + if (i < (binary.length - 1)) + { + out += " "; + } + } + + return out; + } + + /** + * DOCUMENT ME! + * + * @return DOCUMENT ME! + */ + public double[] getDBinary() + { + return dbinary; + } + +}