X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fschemes%2FResidueProperties.java;h=593e83db10bbd1eefe0a75b1fbd4c8012aeb4811;hb=ad15cff29620f960119f80176f1fd443da9f6763;hp=99fc074c7bb60a3e6f815d679f85272a0fde11d7;hpb=a86681109f3b9be838ae3fb2dd9d6db544be4bce;p=jalview.git diff --git a/src/jalview/schemes/ResidueProperties.java b/src/jalview/schemes/ResidueProperties.java index 99fc074..593e83d 100755 --- a/src/jalview/schemes/ResidueProperties.java +++ b/src/jalview/schemes/ResidueProperties.java @@ -1,29 +1,40 @@ /* - * Jalview - A Sequence Alignment Editor and Viewer (Version 2.7) - * Copyright (C) 2011 J Procter, AM Waterhouse, J Engelhardt, LM Lui, G Barton, M Clamp, S Searle + * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) + * Copyright (C) $$Year-Rel$$ The Jalview Authors * * This file is part of Jalview. * * Jalview is free software: you can redistribute it and/or * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - * + * as published by the Free Software Foundation, either version 3 + * of the License, or (at your option) any later version. + * * Jalview is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty * of MERCHANTABILITY or FITNESS FOR A PARTICULAR * PURPOSE. See the GNU General Public License for more details. * - * You should have received a copy of the GNU General Public License along with Jalview. If not, see . + * You should have received a copy of the GNU General Public License + * along with Jalview. If not, see . + * The Jalview Authors are detailed in the 'AUTHORS' file. */ package jalview.schemes; -import java.util.*; +import jalview.analysis.scoremodels.FeatureScoreModel; +import jalview.analysis.scoremodels.PIDScoreModel; +import jalview.api.analysis.ScoreModelI; -import java.awt.*; +import java.awt.Color; +import java.util.ArrayList; +import java.util.Enumeration; +import java.util.Hashtable; +import java.util.List; +import java.util.Map; +import java.util.Vector; public class ResidueProperties { - public static Hashtable scoreMatrices = new Hashtable(); + public static Hashtable scoreMatrices = new Hashtable(); // Stores residue codes/names and colours and other things public static final int[] aaIndex; // aaHash version 2.1.1 and below @@ -98,14 +109,15 @@ public class ResidueProperties } /** - * maximum (gap) index for matrices involving protein alphabet + * maximum (gap) index for matrices involving protein alphabet */ - public final static int maxProteinIndex=23; + public final static int maxProteinIndex = 23; + /** - * maximum (gap) index for matrices involving nucleotide alphabet + * maximum (gap) index for matrices involving nucleotide alphabet */ - public final static int maxNucleotideIndex=10; - + public final static int maxNucleotideIndex = 10; + static { nucleotideIndex = new int[255]; @@ -586,22 +598,22 @@ public class ResidueProperties // Will equate sequences if working with mixed nucleotide sets. // treats T and U identically. R and Y weak equivalence with AG and CTU. // N matches any other base weakly - // + // static final int[][] DNA = { - { 10, -8, -8, -8, -8, 1, 1, 1, -8, 1, 1 }, // A - { -8, 10, -8, -8, -8, 1, 1, -8, 1, 1, 1 }, // C - { -8, -8, 10, -8, -8, 1, 1, 1, -8, 1, 1 }, // G - { -8, -8, -8, 10, 10, 1, 1, -8, 1, 1, 1 }, // T - { -8, -8, -8, 10, 10, 1, 1, -8, 1, 1, 1 }, // U - { 1, 1, 1, 1, 1, 10, 0, 0, 0, 1, 1 }, // I - { 1, 1, 1, 1, 1, 0, 10, 0, 0, 1, 1 }, // X - { 1, -8, 1, -8, -8, 0, 0, 10, -8, 1, 1 }, // R - { -8, 1, -8, 1, 1, 0, 0, -8, 10, 1, 1 }, // Y - { 1, 1, 1, 1, 1, 1, 1, 1, 1, 10, 1 }, // N - { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, // - + { 10, -8, -8, -8, -8, 1, 1, 1, -8, 1, 1 }, // A + { -8, 10, -8, -8, -8, 1, 1, -8, 1, 1, 1 }, // C + { -8, -8, 10, -8, -8, 1, 1, 1, -8, 1, 1 }, // G + { -8, -8, -8, 10, 10, 1, 1, -8, 1, 1, 1 }, // T + { -8, -8, -8, 10, 10, 1, 1, -8, 1, 1, 1 }, // U + { 1, 1, 1, 1, 1, 10, 0, 0, 0, 1, 1 }, // I + { 1, 1, 1, 1, 1, 0, 10, 0, 0, 1, 1 }, // X + { 1, -8, 1, -8, -8, 0, 0, 10, -8, 1, 1 }, // R + { -8, 1, -8, 1, 1, 0, 0, -8, 10, 1, 1 }, // Y + { 1, 1, 1, 1, 1, 1, 1, 1, 1, 10, 1 }, // N + { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }, // - }; -/** + /** * register matrices in list */ static @@ -609,7 +621,7 @@ public class ResidueProperties scoreMatrices.put("BLOSUM62", new ScoreMatrix("BLOSUM62", BLOSUM62, 0)); scoreMatrices.put("PAM250", new ScoreMatrix("PAM250", PAM250, 0)); scoreMatrices.put("DNA", new ScoreMatrix("DNA", DNA, 1)); - + } public static final Color[] pidColours = @@ -689,22 +701,136 @@ public class ResidueProperties codonHash.put("STOP", STOP); } - public static Hashtable codonHash2 = new Hashtable(); + /** + * Nucleotide Ambiguity Codes + */ + public static final Hashtable ambiguityCodes = new Hashtable(); + + /** + * Codon triplets with additional symbols for unambiguous codons that include + * ambiguity codes + */ + public static final Hashtable codonHash2 = new Hashtable(); + + /** + * all ambiguity codes for a given base + */ + public final static Hashtable> _ambiguityCodes = new Hashtable>(); static { + /** + * 3.2. Purine (adenine or guanine): R + * + * R is the symbol previously recommended [1]. + */ + ambiguityCodes.put("R", new String[] + { "A", "G" }); + + /** + * 3.3. Pyrimidine (thymine or cytosine): Y + * + * Y is the symbol previously recommended [1]. + */ + ambiguityCodes.put("Y", new String[] + { "T", "C" }); + /** + * 3.4. Adenine or thymine: W + * + * Although several diverse symbols have been used for this pair, (and for + * the reciprocal pair G+C), only two symbols have a rational basis, L and + * W: L derives from DNA density (light; G+C - heavy - would thus be H); W + * derives from the strength of the hydrogen bonding interaction between the + * base pairs (weak for A+T: G +C - strong - would thus be S). However, the + * system recommended for the three-base series (not-A = B, etc., see below, + * section 3.8) rules out H as this would be not-G. W is thus recommended. + */ + ambiguityCodes.put("W", new String[] + { "A", "T" }); + /** + * 3.5. Guanine or cytosine: S + * + * The choice of this symbol is discussed above in section 3.4. + */ + ambiguityCodes.put("S", new String[] + { "G", "C" }); + /** + * 3.6. Adenine or cytosine: M + * + * There are few common features between A and C. The presence of an NH2 + * group in similar positions on both bases (Fig. 1) makes possible a + * logically derived symbol. A and N being ruled out, M (from aMino) is + * recommended. + * + * + * Fig. 1. Origin of the symbols M and K The four bases are drawn so as to + * show the relationship between adenine and cytosine on the one hand, which + * both have aMino groups at the ring position most distant from the point + * of attachment to the sugar, and between guanine and thymine on the other, + * which both have Keto groups at the corresponding position. The ring atoms + * are numbered as recommended [24-26], although for the present purpose + * this has the disadvantage of giving discordant numbers to the + * corresponding positions. + */ + ambiguityCodes.put("M", new String[] + { "A", "C" }); + /** + * 3.7. Guanine or thymine: K By analogy with A and C (section 3.6), both G + * and T have Keto groups in similar positions (Fig. 1). + */ + ambiguityCodes.put("K", new String[] + { "G", "T" }); + /** + * 3.8. Adenine or thymine or cytosine: H + * + * Not-G is the most simple means of memorising this combination and symbols + * logically related to G were examined. F and H would both be suitable, as + * the letters before and after G in the alphabet, but A would have no + * equivalent to F. The use of H has historical precedence [2]. + */ + ambiguityCodes.put("H", new String[] + { "A", "T", "C" }); + /** + * 3.9. Guanine or cytosine or thymine: B + * + * Not-A as above (section 3.8). + */ + ambiguityCodes.put("B", new String[] + { "G", "T", "C" }); + /** + * 3.10. Guanine or adenine or cytosine: V + * + * Not-T by analogy with not-G (section 3.8) would be U but this is ruled + * out to eliminate confusion with uracil. V is the next logical choice. + * Note that T and U may in some cases be considered to be synonyms. + */ + ambiguityCodes.put("V", new String[] + { "G", "A", "C" }); + /** + * 3.11. Guanine or adenine or thymine: D + * + * Not-C as above (section 3.8). + */ + ambiguityCodes.put("D", new String[] + { "G", "A", "T" }); + /** + * 3.12. Guanine or adenine or thymine or cytosine: N + */ + ambiguityCodes.put("R", new String[] + { "G", "A", "T", "C" }); + // Now build codon translation table codonHash2.put("AAA", "K"); codonHash2.put("AAG", "K"); codonHash2.put("AAC", "N"); codonHash2.put("AAT", "N"); - codonHash2.put("CAA", "E"); - codonHash2.put("CAG", "E"); + codonHash2.put("CAA", "Q"); + codonHash2.put("CAG", "Q"); codonHash2.put("CAC", "H"); codonHash2.put("CAT", "H"); - codonHash2.put("GAA", "Q"); - codonHash2.put("GAG", "Q"); + codonHash2.put("GAA", "E"); + codonHash2.put("GAG", "E"); codonHash2.put("GAC", "D"); codonHash2.put("GAT", "D"); @@ -712,9 +838,9 @@ public class ResidueProperties codonHash2.put("TAT", "Y"); codonHash2.put("ACA", "T"); - codonHash2.put("AAG", "T"); codonHash2.put("ACC", "T"); codonHash2.put("ACT", "T"); + codonHash2.put("ACG", "T"); codonHash2.put("CCA", "P"); codonHash2.put("CCG", "P"); @@ -774,6 +900,146 @@ public class ResidueProperties codonHash2.put("TTC", "F"); codonHash2.put("TTT", "F"); + + buildAmbiguityCodonSet(); + } + + /** + * programmatic generation of codons including ambiguity codes + */ + public static void buildAmbiguityCodonSet() + { + if (_ambiguityCodes.size() > 0) + { + System.err + .println("Ignoring multiple calls to buildAmbiguityCodonSet"); + return; + } + // Invert the ambiguity code set + for (Map.Entry acode : ambiguityCodes.entrySet()) + { + for (String r : acode.getValue()) + { + List codesfor = _ambiguityCodes.get(r); + if (codesfor == null) + { + _ambiguityCodes.put(r, codesfor = new ArrayList()); + } + if (!codesfor.contains(acode.getKey())) + { + codesfor.add(acode.getKey()); + } + else + { + System.err + .println("Inconsistency in the IUBMB ambiguity code nomenclature table: collision for " + + acode.getKey() + " in residue " + r); + } + } + } + // and programmatically add in the ambiguity codes that yield the same amino + // acid + String[] unambcodons = codonHash2.keySet().toArray( + new String[codonHash2.size()]); + for (String codon : unambcodons) + { + String residue = codonHash2.get(codon); + String acodon[][] = new String[codon.length()][]; + for (int i = 0, iSize = codon.length(); i < iSize; i++) + { + String _ac = "" + codon.charAt(i); + List acodes = _ambiguityCodes.get(_ac); + if (acodes != null) + { + acodon[i] = acodes.toArray(new String[acodes.size()]); + } + else + { + acodon[i] = new String[] + {}; + } + } + // enumerate all combinations and test for veracity of translation + int tpos[] = new int[codon.length()], cpos[] = new int[codon.length()]; + for (int i = 0; i < tpos.length; i++) + { + tpos[i] = -1; + } + tpos[acodon.length - 1] = 0; + int ipos, j; + while (tpos[0] < acodon[0].length) + { + // make all codons for this combination + char allres[][] = new char[tpos.length][]; + String _acodon = ""; + char _anuc; + for (ipos = 0; ipos < tpos.length; ipos++) + { + if (acodon[ipos].length == 0 || tpos[ipos] < 0) + { + _acodon += codon.charAt(ipos); + allres[ipos] = new char[] + { codon.charAt(ipos) }; + } + else + { + _acodon += acodon[ipos][tpos[ipos]]; + String[] altbase = ambiguityCodes.get(acodon[ipos][tpos[ipos]]); + allres[ipos] = new char[altbase.length]; + j = 0; + for (String ab : altbase) + { + allres[ipos][j++] = ab.charAt(0); + } + } + } + // test all codons for this combination + for (ipos = 0; ipos < cpos.length; ipos++) + { + cpos[ipos] = 0; + } + boolean valid = true; + do + { + String _codon = ""; + for (j = 0; j < cpos.length; j++) + { + _codon += allres[j][cpos[j]]; + } + String tr = codonHash2.get(_codon); + if (valid = (tr != null && tr.equals(residue))) + { + // advance to next combination + ipos = acodon.length - 1; + while (++cpos[ipos] >= allres[ipos].length && ipos > 0) + { + cpos[ipos] = 0; + ipos--; + } + } + } while (valid && cpos[0] < allres[0].length); + if (valid) + { + // Add this to the set of codons we will translate + // System.out.println("Adding ambiguity codon: " + _acodon + " for " + // + residue); + codonHash2.put(_acodon, residue); + } + else + { + // System.err.println("Rejecting ambiguity codon: " + _acodon + // + " for " + residue); + } + // next combination + ipos = acodon.length - 1; + while (++tpos[ipos] >= acodon[ipos].length && ipos > 0) + { + tpos[ipos] = -1; + ipos--; + } + } + } + } static @@ -1161,6 +1427,80 @@ public class ResidueProperties propHash.put("proline", proline); propHash.put("polar", polar); } + static + { + int[][] propMatrixF = new int[maxProteinIndex][maxProteinIndex], propMatrixPos = new int[maxProteinIndex][maxProteinIndex], propMatrixEpos = new int[maxProteinIndex][maxProteinIndex]; + for (int i = 0; i < maxProteinIndex; i++) + { + int maxF = 0, maxP = 0, maxEP = 0; + String ic = ""; + if (aa.length > i) + { + ic += aa[i]; + } + else + { + ic = "-"; + } + for (int j = i + 1; j < maxProteinIndex; j++) + { + String jc = ""; + if (aa.length > j) + { + jc += aa[j]; + } + else + { + jc = "-"; + } + propMatrixF[i][j] = 0; + propMatrixPos[i][j] = 0; + propMatrixEpos[i][j] = 0; + for (Enumeration en = propHash.keys(); en + .hasMoreElements();) + { + String ph = en.nextElement(); + Map pph = (Map) propHash + .get(ph); + if (pph.get(ic) != null && pph.get(jc) != null) + { + int icp = pph.get(ic).intValue(), jcp = pph.get(jc).intValue(); + // Still working on these definitions. + propMatrixPos[i][j] += icp == jcp && icp > 0 ? 2 : 0; + propMatrixPos[j][i] += icp == jcp && icp > 0 ? 2 : 0; + propMatrixF[i][j] += icp == jcp ? 2 : 0; + propMatrixF[j][i] += icp == jcp ? 2 : 0; + propMatrixEpos[i][j] += icp == jcp ? (1 + icp * 2) : 0; + propMatrixEpos[j][i] += icp == jcp ? (1 + icp * 2) : 0; + } + } + if (maxF < propMatrixF[i][j]) + { + maxF = propMatrixF[i][j]; + } + if (maxP < propMatrixPos[i][j]) + { + maxP = propMatrixPos[i][j]; + } + if (maxEP < propMatrixEpos[i][j]) + { + maxEP = propMatrixEpos[i][j]; + } + } + propMatrixF[i][i] = maxF; + propMatrixPos[i][i] = maxP; + propMatrixEpos[i][i] = maxEP; + } + // JAL-1512 comment out physicochemical score matrices for 2.8.1 release + // scoreMatrices.put("Conservation Pos", new + // ScoreMatrix("Conservation Pos",propMatrixPos,0)); + // scoreMatrices.put("Conservation Both", new + // ScoreMatrix("Conservation Both",propMatrixF,0)); + // scoreMatrices.put("Conservation EnhPos", new + // ScoreMatrix("Conservation EnhPos",propMatrixEpos,0)); + scoreMatrices.put("PID", new PIDScoreModel()); + scoreMatrices.put("Displayed Features", new FeatureScoreModel()); + } private ResidueProperties() { @@ -1231,6 +1571,20 @@ public class ResidueProperties public static String codonTranslate(String lccodon) { + if (false) + { + return _codonTranslate(lccodon); + } + String cdn = codonHash2.get(lccodon.toUpperCase()); + if (cdn != null && cdn.equals("*")) + { + return "STOP"; + } + return cdn; + } + + public static String _codonTranslate(String lccodon) + { String codon = lccodon.toUpperCase(); // all base ambiguity codes yield an 'X' amino acid residue if (codon.indexOf('X') > -1 || codon.indexOf('N') > -1) @@ -1272,13 +1626,24 @@ public class ResidueProperties public static ScoreMatrix getScoreMatrix(String pwtype) { Object val = scoreMatrices.get(pwtype); - if (val != null) + if (val != null && val instanceof ScoreMatrix) { return (ScoreMatrix) val; } return null; } + /** + * get a ScoreModel based on its string name + * + * @param pwtype + * @return scoremodel of type pwtype or null + */ + public static ScoreModelI getScoreModel(String pwtype) + { + return scoreMatrices.get(pwtype); + } + public static int getPAM250(char c, char d) { int a = aaIndex[c]; @@ -1336,12 +1701,81 @@ public class ResidueProperties * Used by getRNASecStrucState * */ - public static Hashtable toRNAssState; + public static Hashtable toRNAssState; + + public static boolean RNAcloseParen[] = new boolean[255]; static { - toRNAssState = new Hashtable(); - toRNAssState.put(")", "S"); - toRNAssState.put("(", "S"); + toRNAssState = new Hashtable(); + toRNAssState.put(")", "("); + toRNAssState.put("(", "("); + toRNAssState.put("]", "["); + toRNAssState.put("[", "["); + toRNAssState.put("{", "{"); + toRNAssState.put("}", "{"); + toRNAssState.put(">", ">"); + toRNAssState.put("<", ">"); + toRNAssState.put("A", "A"); + toRNAssState.put("a", "A"); + toRNAssState.put("B", "B"); + toRNAssState.put("b", "B"); + toRNAssState.put("C", "C"); + toRNAssState.put("c", "C"); + toRNAssState.put("D", "D"); + toRNAssState.put("d", "D"); + toRNAssState.put("E", "E"); + toRNAssState.put("e", "E"); + toRNAssState.put("F", "F"); + toRNAssState.put("f", "F"); + toRNAssState.put("G", "G"); + toRNAssState.put("g", "G"); + toRNAssState.put("H", "H"); + toRNAssState.put("h", "H"); + toRNAssState.put("I", "I"); + toRNAssState.put("i", "I"); + toRNAssState.put("J", "J"); + toRNAssState.put("j", "J"); + toRNAssState.put("K", "K"); + toRNAssState.put("k", "K"); + toRNAssState.put("L", "L"); + toRNAssState.put("l", "L"); + toRNAssState.put("M", "M"); + toRNAssState.put("m", "M"); + toRNAssState.put("N", "N"); + toRNAssState.put("n", "N"); + toRNAssState.put("O", "O"); + toRNAssState.put("o", "O"); + toRNAssState.put("P", "P"); + toRNAssState.put("p", "P"); + toRNAssState.put("Q", "Q"); + toRNAssState.put("q", "Q"); + toRNAssState.put("R", "R"); + toRNAssState.put("r", "R"); + toRNAssState.put("S", "S"); + toRNAssState.put("s", "S"); + toRNAssState.put("T", "T"); + toRNAssState.put("t", "T"); + toRNAssState.put("U", "U"); + toRNAssState.put("u", "U"); + toRNAssState.put("V", "V"); + toRNAssState.put("v", "V"); + toRNAssState.put("W", "W"); + toRNAssState.put("w", "W"); + toRNAssState.put("X", "X"); + toRNAssState.put("x", "X"); + toRNAssState.put("Y", "Y"); + toRNAssState.put("y", "Y"); + toRNAssState.put("Z", "Z"); + toRNAssState.put("z", "Z"); + for (int p = 0; p < RNAcloseParen.length; p++) + { + RNAcloseParen[p] = false; + } + for (String k : toRNAssState.keySet()) + { + RNAcloseParen[k.charAt(0)] = k.charAt(0) != toRNAssState.get(k) + .charAt(0); + } } /** @@ -1362,7 +1796,8 @@ public class ResidueProperties String ssc = ssstring.substring(i, i + 1); if (toRNAssState.containsKey(ssc)) { - ss.append((String) toRNAssState.get(ssc)); + // valid ss character - so return it + ss.append(ssc); // (String) toRNAssState.get(ssc)); } else { @@ -1372,6 +1807,11 @@ public class ResidueProperties return ss.toString(); } + public static boolean isCloseParenRNA(char dc) + { + return RNAcloseParen[dc]; + } + // main method generates perl representation of residue property hash // / cut here public static void main(String[] args) @@ -1421,4 +1861,5 @@ public class ResidueProperties System.out.println("};"); } // to here + }