From 93b7c00c600a36f81c9a5f251366ed09ceae7597 Mon Sep 17 00:00:00 2001 From: TZVanaalten Date: Wed, 16 Aug 2017 15:50:28 +0100 Subject: [PATCH 1/1] JAL-2599 fix background frequency storage and correct colour scheme --- src/jalview/analysis/AAFrequency.java | 40 ++--------- src/jalview/schemes/HMMERColourScheme.java | 29 ++------ src/jalview/schemes/ResidueProperties.java | 72 ++++++++++---------- .../util/HMMProbabilityDistributionAnalyser.java | 7 +- 4 files changed, 53 insertions(+), 95 deletions(-) diff --git a/src/jalview/analysis/AAFrequency.java b/src/jalview/analysis/AAFrequency.java index 5ea892b..046c589 100755 --- a/src/jalview/analysis/AAFrequency.java +++ b/src/jalview/analysis/AAFrequency.java @@ -853,18 +853,8 @@ public class AAFrequency for (char symbol : hmm.getSymbols()) { float freq = 0f; - if ("amino".equals(hmm.getAlphabetType())) - { - freq = ResidueProperties.aminoBackgroundFrequencies.get(symbol); - } - if ("DNA".equals(hmm.getAlphabetType())) - { - freq = ResidueProperties.dnaBackgroundFrequencies.get(symbol); - } - if ("RNA".equals(hmm.getAlphabetType())) - { - freq = ResidueProperties.rnaBackgroundFrequencies.get(symbol); - } + freq = ResidueProperties.backgroundFrequencies + .get(hmm.getAlphabetType()).get(symbol); Double hmmProb = hmm.getMatchEmissionProbability(column, symbol); float prob = hmmProb.floatValue(); informationContent += prob * (Math.log(prob / freq) / Math.log(2)); @@ -952,30 +942,12 @@ public class AAFrequency value = hmm.getMatchEmissionProbability(column, symbol); double freq; - if (AMINO.equals(alph) && removeBelowBackground) + freq = ResidueProperties.backgroundFrequencies.get(alph).get(symbol); + if (value < freq && removeBelowBackground) { - freq = ResidueProperties.aminoBackgroundFrequencies.get(symbol); - if (value < freq) - { - value = 0d; - } - } - else if (DNA.equals(alph) && removeBelowBackground) - { - freq = ResidueProperties.dnaBackgroundFrequencies.get(symbol); - if (value < freq) - { - value = 0d; - } - } - else if (RNA.equals(alph) && removeBelowBackground) - { - freq = ResidueProperties.rnaBackgroundFrequencies.get(symbol); - if (value < freq) - { - value = 0d; - } + return 0; } + value = value * 10000; return Math.round(value.floatValue()); } diff --git a/src/jalview/schemes/HMMERColourScheme.java b/src/jalview/schemes/HMMERColourScheme.java index 113afd1..e5b242d 100644 --- a/src/jalview/schemes/HMMERColourScheme.java +++ b/src/jalview/schemes/HMMERColourScheme.java @@ -56,41 +56,26 @@ public class HMMERColourScheme extends ResidueColourScheme Double prob; prob = hmm.getMatchEmissionProbability(position, symbol); double freq = 0; - if ("amino".equals(hmm.getAlphabetType())) + String alpha = hmm.getAlphabetType(); + if (!ResidueProperties.backgroundFrequencies.get(alpha).containsKey(symbol)) { - if (!ResidueProperties.aminoBackgroundFrequencies.containsKey(symbol)) - { - return Color.white; - } - freq = ResidueProperties.aminoBackgroundFrequencies.get(symbol); - } - else if ("DNA".equals(hmm.getAlphabetType())) - { - if (!ResidueProperties.dnaBackgroundFrequencies.containsKey(symbol)) - { - return Color.white; - } - freq = ResidueProperties.dnaBackgroundFrequencies.get(symbol); + return Color.white; } - else if ("RNA".equals(hmm.getAlphabetType())) + else { - if (!ResidueProperties.rnaBackgroundFrequencies.containsKey(symbol)) - { - return Color.white; - } - freq = ResidueProperties.rnaBackgroundFrequencies.get(symbol); + freq = ResidueProperties.backgroundFrequencies.get(alpha).get(symbol); } if (prob == 0) { return new Color(230, 0, 0); } - Double value = Math.log(prob / freq); + Double value = Math.log(prob / freq) / Math.log(2); Color colour = null; if (value > 0) { colour = ColorUtils.getGraduatedColour(value.floatValue(), 0, - Color.WHITE, 4.52f, Color.blue); + Color.WHITE, 6.52f, Color.blue); } else if (value < 0) { diff --git a/src/jalview/schemes/ResidueProperties.java b/src/jalview/schemes/ResidueProperties.java index 8451e8a..a8b20d4 100755 --- a/src/jalview/schemes/ResidueProperties.java +++ b/src/jalview/schemes/ResidueProperties.java @@ -48,14 +48,8 @@ public class ResidueProperties // lookup from modified amino acid (e.g. MSE) to canonical form (e.g. MET) public static final Map modifications = new HashMap<>(); - // amino acid background Frequencies - public static final Map aminoBackgroundFrequencies = new HashMap<>(); - - // dna background Frequencies - public static final Map dnaBackgroundFrequencies = new HashMap<>(); - - // rna background Frequencies - public static final Map rnaBackgroundFrequencies = new HashMap<>(); + // residue background frequencies across different alphabets + public static final Map> backgroundFrequencies = new HashMap<>(); static { @@ -2520,26 +2514,28 @@ public class ResidueProperties static { - aminoBackgroundFrequencies.put('A', 0.0826f); - aminoBackgroundFrequencies.put('Q', 0.0393f); - aminoBackgroundFrequencies.put('L', 0.0965f); - aminoBackgroundFrequencies.put('S', 0.0661f); - aminoBackgroundFrequencies.put('R', 0.0553f); - aminoBackgroundFrequencies.put('E', 0.0674f); - aminoBackgroundFrequencies.put('K', 0.0582f); - aminoBackgroundFrequencies.put('T', 0.0535f); - aminoBackgroundFrequencies.put('N', 0.0406f); - aminoBackgroundFrequencies.put('G', 0.0708f); - aminoBackgroundFrequencies.put('M', 0.0241f); - aminoBackgroundFrequencies.put('W', 0.0109f); - aminoBackgroundFrequencies.put('D', 0.0546f); - aminoBackgroundFrequencies.put('H', 0.0227f); - aminoBackgroundFrequencies.put('F', 0.0386f); - aminoBackgroundFrequencies.put('Y', 0.0292f); - aminoBackgroundFrequencies.put('C', 0.0137f); - aminoBackgroundFrequencies.put('I', 0.0593f); - aminoBackgroundFrequencies.put('P', 0.0472f); - aminoBackgroundFrequencies.put('V', 0.0686f); + Map amino = new HashMap<>(); + amino.put('A', 0.0826f); + amino.put('Q', 0.0393f); + amino.put('L', 0.0965f); + amino.put('S', 0.0661f); + amino.put('R', 0.0553f); + amino.put('E', 0.0674f); + amino.put('K', 0.0582f); + amino.put('T', 0.0535f); + amino.put('N', 0.0406f); + amino.put('G', 0.0708f); + amino.put('M', 0.0241f); + amino.put('W', 0.0109f); + amino.put('D', 0.0546f); + amino.put('H', 0.0227f); + amino.put('F', 0.0386f); + amino.put('Y', 0.0292f); + amino.put('C', 0.0137f); + amino.put('I', 0.0593f); + amino.put('P', 0.0472f); + amino.put('V', 0.0686f); + backgroundFrequencies.put("amino", amino); } @@ -2547,19 +2543,23 @@ public class ResidueProperties static { - dnaBackgroundFrequencies.put('A', 0.25f); - dnaBackgroundFrequencies.put('C', 0.25f); - dnaBackgroundFrequencies.put('T', 0.25f); - dnaBackgroundFrequencies.put('G', 0.25f); + Map dna = new HashMap<>(); + dna.put('A', 0.25f); + dna.put('C', 0.25f); + dna.put('T', 0.25f); + dna.put('G', 0.25f); + backgroundFrequencies.put("DNA", dna); } static { - rnaBackgroundFrequencies.put('A', 0.25f); - rnaBackgroundFrequencies.put('C', 0.25f); - rnaBackgroundFrequencies.put('T', 0.25f); - rnaBackgroundFrequencies.put('G', 0.25f); + Map rna = new HashMap<>(); + rna.put('A', 0.25f); + rna.put('C', 0.25f); + rna.put('T', 0.25f); + rna.put('G', 0.25f); + backgroundFrequencies.put("RNA", rna); } diff --git a/src/jalview/util/HMMProbabilityDistributionAnalyser.java b/src/jalview/util/HMMProbabilityDistributionAnalyser.java index 24b3bc3..c84b177 100644 --- a/src/jalview/util/HMMProbabilityDistributionAnalyser.java +++ b/src/jalview/util/HMMProbabilityDistributionAnalyser.java @@ -372,7 +372,7 @@ public class HMMProbabilityDistributionAnalyser alignPos = hmm.getNodeAlignmentColumn(length); symbol = sequences.get(width).getCharAt(alignPos); - if (ResidueProperties.aminoBackgroundFrequencies + if (ResidueProperties.backgroundFrequencies.get("amino") .containsKey(symbol)) { count++; @@ -406,14 +406,15 @@ public class HMMProbabilityDistributionAnalyser alignPos = hmm.getNodeAlignmentColumn(length); symbol = sequences.get(width).getCharAt(alignPos); - if (ResidueProperties.aminoBackgroundFrequencies + if (ResidueProperties.backgroundFrequencies.get("amino") .containsKey(symbol)) { Double prob; Float bfreq; Double llr; prob = hmm.getMatchEmissionProbability(alignPos, symbol); - bfreq = ResidueProperties.aminoBackgroundFrequencies.get(symbol); + bfreq = ResidueProperties.backgroundFrequencies.get("amino") + .get(symbol); if (prob == 0 || bfreq == 0) { System.out.println("error"); -- 1.7.10.2