public class ResidueProperties
{
- // alphabet names used in Hidden Markov Model files
- public static final String ALPHABET_RNA = "RNA";
-
- public static final String ALPHABET_DNA = "DNA";
-
- public static final String ALPHABET_AMINO = "amino";
-
// Stores residue codes/names and colours and other things
public static final int[] aaIndex; // aaHash version 2.1.1 and below
// lookup from modified amino acid (e.g. MSE) to canonical form (e.g. MET)
public static final Map<String, String> modifications = new HashMap<>();
- // residue background frequencies across different alphabets
- public static final Map<String, Map<Character, Float>> backgroundFrequencies = new HashMap<>();
-
static
{
aaIndex = new int[255];
}
- static
- {
- Map<Character, Float> amino = new HashMap<>();
- amino.put('A', 0.0826f);
- amino.put('Q', 0.0393f);
- amino.put('L', 0.0965f);
- amino.put('S', 0.0661f);
- amino.put('R', 0.0553f);
- amino.put('E', 0.0674f);
- amino.put('K', 0.0582f);
- amino.put('T', 0.0535f);
- amino.put('N', 0.0406f);
- amino.put('G', 0.0708f);
- amino.put('M', 0.0241f);
- amino.put('W', 0.0109f);
- amino.put('D', 0.0546f);
- amino.put('H', 0.0227f);
- amino.put('F', 0.0386f);
- amino.put('Y', 0.0292f);
- amino.put('C', 0.0137f);
- amino.put('I', 0.0593f);
- amino.put('P', 0.0472f);
- amino.put('V', 0.0686f);
- backgroundFrequencies.put(ALPHABET_AMINO, amino);
- // todo: these don't match https://www.ebi.ac.uk/uniprot/TrEMBLstats - what
- // are they?
- }
-
- // TODO get correct frequencies
-
- static
- {
- Map<Character, Float> dna = new HashMap<>();
- dna.put('A', 0.25f);
- dna.put('C', 0.25f);
- dna.put('T', 0.25f);
- dna.put('G', 0.25f);
- backgroundFrequencies.put(ALPHABET_DNA, dna);
-
- }
-
- static
- {
- Map<Character, Float> rna = new HashMap<>();
- rna.put('A', 0.25f);
- rna.put('C', 0.25f);
- rna.put('T', 0.25f);
- rna.put('G', 0.25f);
- backgroundFrequencies.put(ALPHABET_RNA, rna);
-
- }
-
public static String getCanonicalAminoAcid(String aA)
{
String canonical = modifications.get(aA);