public class ResidueProperties
{
+ // alphabet names used in Hidden Markov Model files
+ public static final String ALPHABET_RNA = "RNA";
+
+ public static final String ALPHABET_DNA = "DNA";
+
+ public static final String ALPHABET_AMINO = "amino";
+
// Stores residue codes/names and colours and other things
public static final int[] aaIndex; // aaHash version 2.1.1 and below
// lookup from modified amino acid (e.g. MSE) to canonical form (e.g. MET)
public static final Map<String, String> modifications = new HashMap<>();
+ // residue background frequencies across different alphabets
+ public static final Map<String, Map<Character, Float>> backgroundFrequencies = new HashMap<>();
+
static
{
aaIndex = new int[255];
return peptide;
}
- public static Hashtable<String, String> toDssp3State;
+ /*
+ * lookup of (A-Z) alternative secondary structure symbols'
+ * equivalents in DSSP3 notation
+ */
+ private static char[] toDssp3State;
static
{
- toDssp3State = new Hashtable<>();
- toDssp3State.put("H", "H");
- toDssp3State.put("E", "E");
- toDssp3State.put("C", " ");
- toDssp3State.put(" ", " ");
- toDssp3State.put("T", " ");
- toDssp3State.put("B", "E");
- toDssp3State.put("G", "H");
- toDssp3State.put("I", "H");
- toDssp3State.put("X", " ");
+ toDssp3State = new char[9]; // for 'A'-'I'; extend if needed
+ Arrays.fill(toDssp3State, ' ');
+ toDssp3State['B' - 'A'] = 'E';
+ toDssp3State['E' - 'A'] = 'E';
+ toDssp3State['G' - 'A'] = 'H';
+ toDssp3State['H' - 'A'] = 'H';
+ toDssp3State['I' - 'A'] = 'H';
}
/**
* translate from other dssp secondary structure alphabets to 3-state
*
- * @param ssstring
- * @return ssstring as a three-state secondary structure assignment.
+ * @param ssString
+ * @return ssstring
*/
- public static String getDssp3state(String ssstring)
+ public static String getDssp3state(String ssString)
{
- if (ssstring == null)
+ if (ssString == null)
{
return null;
}
- StringBuffer ss = new StringBuffer();
- for (int i = 0; i < ssstring.length(); i++)
+ int lookupSize = toDssp3State.length;
+ int len = ssString.length();
+ char[] trans = new char[len];
+ for (int i = 0; i < len; i++)
{
- String ssc = ssstring.substring(i, i + 1);
- if (toDssp3State.containsKey(ssc))
+ char c = ssString.charAt(i);
+ int index = c - 'A';
+ if (index < 0 || index >= lookupSize)
{
- ss.append(toDssp3State.get(ssc));
+ trans[i] = ' ';
}
else
{
- ss.append(" ");
+ trans[i] = toDssp3State[index];
}
}
- return ss.toString();
+ return new String(trans);
}
static
}
+ static
+ {
+ Map<Character, Float> amino = new HashMap<>();
+ amino.put('A', 0.0826f);
+ amino.put('Q', 0.0393f);
+ amino.put('L', 0.0965f);
+ amino.put('S', 0.0661f);
+ amino.put('R', 0.0553f);
+ amino.put('E', 0.0674f);
+ amino.put('K', 0.0582f);
+ amino.put('T', 0.0535f);
+ amino.put('N', 0.0406f);
+ amino.put('G', 0.0708f);
+ amino.put('M', 0.0241f);
+ amino.put('W', 0.0109f);
+ amino.put('D', 0.0546f);
+ amino.put('H', 0.0227f);
+ amino.put('F', 0.0386f);
+ amino.put('Y', 0.0292f);
+ amino.put('C', 0.0137f);
+ amino.put('I', 0.0593f);
+ amino.put('P', 0.0472f);
+ amino.put('V', 0.0686f);
+ backgroundFrequencies.put(ALPHABET_AMINO, amino);
+ // todo: these don't match https://www.ebi.ac.uk/uniprot/TrEMBLstats - what
+ // are they?
+ }
+
+ // TODO get correct frequencies
+
+ static
+ {
+ Map<Character, Float> dna = new HashMap<>();
+ dna.put('A', 0.25f);
+ dna.put('C', 0.25f);
+ dna.put('T', 0.25f);
+ dna.put('G', 0.25f);
+ backgroundFrequencies.put(ALPHABET_DNA, dna);
+
+ }
+
+ static
+ {
+ Map<Character, Float> rna = new HashMap<>();
+ rna.put('A', 0.25f);
+ rna.put('C', 0.25f);
+ rna.put('T', 0.25f);
+ rna.put('G', 0.25f);
+ backgroundFrequencies.put(ALPHABET_RNA, rna);
+
+ }
+
public static String getCanonicalAminoAcid(String aA)
{
String canonical = modifications.get(aA);
// main method generates perl representation of residue property hash
// / cut here
+ /**
+ * @j2sIgnore
+ * @param args
+ */
public static void main(String[] args)
{
Hashtable<String, Vector<String>> aaProps = new Hashtable<>();