X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fschemes%2FResidueProperties.java;h=087c16a4bfc78bf02ac30a194f9f637b9e1ae032;hb=8775e0baba5a310900582fc0eb0d277937f6c0f7;hp=bf30ed6175223d7488e179d476817cd35a1070a6;hpb=a97dbd3e6e9707de13f47c66beaa15df8ea24d0e;p=jalview.git diff --git a/src/jalview/schemes/ResidueProperties.java b/src/jalview/schemes/ResidueProperties.java index bf30ed6..087c16a 100755 --- a/src/jalview/schemes/ResidueProperties.java +++ b/src/jalview/schemes/ResidueProperties.java @@ -34,6 +34,13 @@ import java.util.Vector; public class ResidueProperties { + // alphabet names used in Hidden Markov Model files + public static final String ALPHABET_RNA = "RNA"; + + public static final String ALPHABET_DNA = "DNA"; + + public static final String ALPHABET_AMINO = "amino"; + // Stores residue codes/names and colours and other things public static final int[] aaIndex; // aaHash version 2.1.1 and below @@ -50,6 +57,9 @@ public class ResidueProperties // lookup from modified amino acid (e.g. MSE) to canonical form (e.g. MET) public static final Map modifications = new HashMap<>(); + // residue background frequencies across different alphabets + public static final Map> backgroundFrequencies = new HashMap<>(); + static { aaIndex = new int[255]; @@ -905,47 +915,51 @@ public class ResidueProperties return peptide; } - public static Hashtable toDssp3State; + /* + * lookup of (A-Z) alternative secondary structure symbols' + * equivalents in DSSP3 notation + */ + private static char[] toDssp3State; static { - toDssp3State = new Hashtable<>(); - toDssp3State.put("H", "H"); - toDssp3State.put("E", "E"); - toDssp3State.put("C", " "); - toDssp3State.put(" ", " "); - toDssp3State.put("T", " "); - toDssp3State.put("B", "E"); - toDssp3State.put("G", "H"); - toDssp3State.put("I", "H"); - toDssp3State.put("X", " "); + toDssp3State = new char[9]; // for 'A'-'I'; extend if needed + Arrays.fill(toDssp3State, ' '); + toDssp3State['B' - 'A'] = 'E'; + toDssp3State['E' - 'A'] = 'E'; + toDssp3State['G' - 'A'] = 'H'; + toDssp3State['H' - 'A'] = 'H'; + toDssp3State['I' - 'A'] = 'H'; } /** * translate from other dssp secondary structure alphabets to 3-state * - * @param ssstring - * @return ssstring as a three-state secondary structure assignment. + * @param ssString + * @return ssstring */ - public static String getDssp3state(String ssstring) + public static String getDssp3state(String ssString) { - if (ssstring == null) + if (ssString == null) { return null; } - StringBuffer ss = new StringBuffer(); - for (int i = 0; i < ssstring.length(); i++) + int lookupSize = toDssp3State.length; + int len = ssString.length(); + char[] trans = new char[len]; + for (int i = 0; i < len; i++) { - String ssc = ssstring.substring(i, i + 1); - if (toDssp3State.containsKey(ssc)) + char c = ssString.charAt(i); + int index = c - 'A'; + if (index < 0 || index >= lookupSize) { - ss.append(toDssp3State.get(ssc)); + trans[i] = ' '; } else { - ss.append(" "); + trans[i] = toDssp3State[index]; } } - return ss.toString(); + return new String(trans); } static @@ -2266,6 +2280,58 @@ public class ResidueProperties } + static + { + Map amino = new HashMap<>(); + amino.put('A', 0.0826f); + amino.put('Q', 0.0393f); + amino.put('L', 0.0965f); + amino.put('S', 0.0661f); + amino.put('R', 0.0553f); + amino.put('E', 0.0674f); + amino.put('K', 0.0582f); + amino.put('T', 0.0535f); + amino.put('N', 0.0406f); + amino.put('G', 0.0708f); + amino.put('M', 0.0241f); + amino.put('W', 0.0109f); + amino.put('D', 0.0546f); + amino.put('H', 0.0227f); + amino.put('F', 0.0386f); + amino.put('Y', 0.0292f); + amino.put('C', 0.0137f); + amino.put('I', 0.0593f); + amino.put('P', 0.0472f); + amino.put('V', 0.0686f); + backgroundFrequencies.put(ALPHABET_AMINO, amino); + // todo: these don't match https://www.ebi.ac.uk/uniprot/TrEMBLstats - what + // are they? + } + + // TODO get correct frequencies + + static + { + Map dna = new HashMap<>(); + dna.put('A', 0.25f); + dna.put('C', 0.25f); + dna.put('T', 0.25f); + dna.put('G', 0.25f); + backgroundFrequencies.put(ALPHABET_DNA, dna); + + } + + static + { + Map rna = new HashMap<>(); + rna.put('A', 0.25f); + rna.put('C', 0.25f); + rna.put('T', 0.25f); + rna.put('G', 0.25f); + backgroundFrequencies.put(ALPHABET_RNA, rna); + + } + public static String getCanonicalAminoAcid(String aA) { String canonical = modifications.get(aA);