label.colourScheme_nucleotide = Nucleotide
label.colourScheme_t-coffee_scores = T-Coffee Scores
label.colourScheme_rna_helices = By RNA Helices
+label.colourScheme_hmmer-uniprot = HMMER profile v global background
+label.colourScheme_hmmer-alignment = HMMER profile v alignment background
label.blc = BLC
label.fasta = Fasta
label.msf = MSF
nodeLookup.put(column, node);
}
+ /**
+ * Updates the mapping of nodes of the HMM to non-gapped positions of the
+ * sequence. Nodes 1, 2, 3... are mapped to the columns occupied by the first,
+ * second, third... residues of the sequence. The 'begin' node (node 0) of the
+ * HMM is not mapped.
+ *
+ * @param sequence
+ */
public void updateMapping(char[] sequence)
{
int nodeNo = 1;
import jalview.util.QuickSort;
import jalview.util.SparseCount;
+import java.util.List;
+
/**
* A class to count occurrences of residues in a profile, optimised for speed
* and memory footprint.
}
/**
+ * A constructor that counts frequency of all symbols (including gaps) in the
+ * sequences (not case-sensitive)
+ *
+ * @param sequences
+ */
+ public ResidueCount(List<SequenceI> sequences)
+ {
+ this();
+ for (SequenceI seq : sequences)
+ {
+ for (int i = 0; i < seq.getLength(); i++)
+ {
+ add(seq.getCharAt(i));
+ }
+ }
+ }
+
+ /**
* Increments the count for the given character. The supplied character may be
* upper or lower case but counts are for the upper case only. Gap characters
* (space, ., -) are all counted together.
return sb.toString();
}
+ /**
+ * Answers the total count for all symbols (excluding gaps)
+ *
+ * @return
+ */
public int getTotalCount()
{
int total = 0;
for (char symbol : this.getSymbolCounts().symbols)
{
- if (!Comparison.isGap(symbol))
- {
- total += getCount(symbol);
- }
+ total += getCount(symbol);
}
return total;
}
*/
final String name = scheme.getSchemeName();
String label = MessageManager.getStringOrReturn(
- "label.colourScheme_" + name.toLowerCase().replace(" ", "_"),
- name);
+ "label.colourScheme_", name.toLowerCase().replace(" ", "_"));
final JRadioButtonMenuItem radioItem = new JRadioButtonMenuItem(
label);
radioItem.setName(name);
{
if (af != null)
{
+ af.buildColourMenu(); // enable HMMER colour schemes
af.setProgressBar("", msgID);
}
}
import java.awt.Color;
import java.util.HashMap;
-import java.util.List;
import java.util.Map;
+/**
+ * A colour scheme based on a selected Hidden Markov Model. The colour is
+ * <ul>
+ * <li>white for a gap</li>
+ * <li>red for an insertion</li>
+ * <li>orange for negative information content</li>
+ * <li>white to blue for increasing information content</li>
+ * </ul>
+ * where information content is the log ratio
+ *
+ * <pre>
+ * log(profile match emission probability / residue background probability>
+ * </pre>
+ *
+ * using the alignment's background frequencies for residues.
+ *
+ * @author tzvanaalten
+ *
+ */
public class HMMERAlignmentColourScheme extends ResidueColourScheme
{
- Map<Character, Double> backgroundFrequencies = new HashMap<>();
+ /*
+ * the ratio, for each symbol, of its frequency to total symbol count
+ */
+ Map<Character, Double> frequency = new HashMap<>();
- Double maxLLR;
+ float logTotalCount;
HiddenMarkovModel hmm;
- boolean peptideSpecific;
-
- boolean nucleotideSpecific;
-
- public HMMERAlignmentColourScheme(HiddenMarkovModel markov)
+ /**
+ * Constructor given a Hidden Markov Model
+ *
+ * @param sg
+ *
+ * @param markov
+ */
+ public HMMERAlignmentColourScheme(AnnotatedCollectionI sg,
+ HiddenMarkovModel markov)
{
hmm = markov;
+ countFrequencies(sg);
}
+ /**
+ * Default constructor (required by ColourSchemes.loadColourSchemes)
+ */
public HMMERAlignmentColourScheme()
{
-
}
@Override
public Color findColour(char symbol, int position, SequenceI seq,
String consensusResidue, float pid)
{
- if (hmm == null)
- {
- return Color.white;
- }
return findColour(symbol, position);
}
/**
- * Returns the colour at a particular symbol at a column in the alignment.
+ * Returns the colour at a particular symbol at a column in the alignment:
+ * <ul>
+ * <li>white for a gap</li>
+ * <li>red for an insertion</li>
+ * <li>orange for negative information content</li>
+ * <li>white to blue for increasing information content</li>
+ * </ul>
*
* @param symbol
- * @param position
- * @return Red for an insertion, white for a gap, orange for a negative
- * information content, white to blue for increasing information
- * content.
+ * @param column
+ * @return
*/
- private Color findColour(char symbol, int position)
+ private Color findColour(char symbol, int column)
{
-
- if (Comparison.isGap(symbol))
+ if (hmm == null || Comparison.isGap(symbol))
{
return Color.white;
}
{
symbol = Character.toUpperCase(symbol);
}
- Double prob;
- prob = hmm.getMatchEmissionProbability(position, symbol);
- Double freq = backgroundFrequencies.get(symbol);
+ double prob = hmm.getMatchEmissionProbability(column, symbol);
+ Double freq = frequency.get(symbol);
if (freq == null)
{
return Color.white;
{
return new Color(230, 0, 0);
}
- Double value = Math.log(prob / freq);
+ double value = Math.log(prob / freq.doubleValue());
Color colour = null;
if (value > 0)
{
-
- colour = ColorUtils.getGraduatedColour(value.floatValue(), 0,
- Color.WHITE, maxLLR.floatValue(), Color.blue);
+ colour = ColorUtils.getGraduatedColour((float) value, 0,
+ Color.WHITE, logTotalCount, Color.blue);
}
else if (value < 0)
{
return Color.ORANGE;
-
}
return colour;
}
public void alignmentChanged(AnnotatedCollectionI collection,
Map<SequenceI, SequenceCollectionI> hiddenReps)
{
- List<SequenceI> seqs = collection.getSequences();
- for (SequenceI seq : seqs)
- {
- if (seq.isHMMConsensusSequence())
- {
- hmm = seq.getHMM();
- break;
- }
- }
-
- count(collection);
-
+ /*
+ * ? no need to do anything if alignment is adjusted
+ * since findColour() handles everything
+ */
}
@Override
public ColourSchemeI getInstance(AnnotatedCollectionI sg,
Map<SequenceI, SequenceCollectionI> hiddenRepSequences)
{
- HiddenMarkovModel markov = null;
- List<SequenceI> seqs = sg.getSequences();
- for (SequenceI seq : seqs)
- {
- if (seq.getHMM() != null)
- {
- markov = seq.getHMM();
- break;
- }
- }
-
- count(sg);
-
- HMMERAlignmentColourScheme colour = new HMMERAlignmentColourScheme(
- markov);
- return colour;
-
+ return new HMMERAlignmentColourScheme(sg, hmm);
}
@Override
public boolean isApplicableTo(AnnotatedCollectionI ac)
{
- return true;
-
+ return !ac.getHMMConsensusSequences().isEmpty();
}
@Override
public String getSchemeName()
{
-
- return JalviewColourScheme.HMMERA.name();
+ return JalviewColourScheme.HMMERA.toString();
}
@Override
return false;
}
- public void count(AnnotatedCollectionI sg)
+ /**
+ * Counts and stores the relatively frequency of every residue in the
+ * alignment
+ *
+ * @param sg
+ */
+ public void countFrequencies(AnnotatedCollectionI sg)
{
- ResidueCount counts = new ResidueCount();
- for (SequenceI seq : sg.getSequences())
- {
- for (int i = 0; i < seq.getLength(); i++)
- {
- if (!Comparison.isGap(seq.getCharAt(i)))
- {
- counts.add(seq.getCharAt(i));
- }
- }
- }
-
- int total = counts.getTotalCount();
+ ResidueCount counts = new ResidueCount(sg.getSequences());
+ int total = counts.getTotalCount(); // excludes gaps
for (char symbol : counts.getSymbolCounts().symbols)
{
- double count = Double.valueOf(counts.getCount(symbol))
- / Double.valueOf(total);
- backgroundFrequencies.put(symbol, count);
+ double freq = counts.getCount(symbol) / (double) total;
+ frequency.put(symbol, freq);
}
- maxLLR = Math.log(total);
+ logTotalCount = (float) Math.log(total);
}
}
-
import java.util.List;
import java.util.Map;
+/**
+ * A colour scheme based on a selected Hidden Markov Model. The colour is
+ * <ul>
+ * <li>white for a gap</li>
+ * <li>red for an insertion</li>
+ * <li>orange for negative information content</li>
+ * <li>white to blue for increasing information content</li>
+ * </ul>
+ * where information content is the log ratio
+ *
+ * <pre>
+ * log(profile match emission probability / residue background probability>
+ * </pre>
+ *
+ * using global ('Uniprot') background frequencies for residues.
+ *
+ * @author tzvanaalten
+ *
+ */
public class HMMERColourScheme extends ResidueColourScheme
{
+ /*
+ * The highest possible log ratio is when match emission probability in
+ * the HMM model is 1, and background (for W) is 0.0109 giving
+ * log(1/0.0109) = log(91.743) = 4.519
+ */
+ private static final float MAX_LOG_RATIO = 4.519f;
- AnnotatedCollectionI alignment;
+ private static final Color REDDISH = new Color(230, 0, 0);
HiddenMarkovModel hmm;
-
- boolean peptideSpecific;
-
- boolean nucleotideSpecific;
+ /**
+ * Constructor given a Hidden Markov Model
+ *
+ * @param markov
+ */
public HMMERColourScheme(HiddenMarkovModel markov)
{
hmm = markov;
}
+
+ /**
+ * Default constructor (required by ColourSchemes.loadColourSchemes)
+ */
public HMMERColourScheme()
{
-
}
-
@Override
public Color findColour(char symbol, int position, SequenceI seq,
String consensusResidue, float pid)
{
- if (hmm ==null)
- {
- return Color.white;
- }
return findColour(symbol, position);
}
/**
- * Returns the colour at a particular symbol at a column in the alignment.
+ * Returns the colour at a particular symbol at a column in the alignment:
+ * <ul>
+ * <li>white for a gap</li>
+ * <li>red for an insertion</li>
+ * <li>orange for negative information content</li>
+ * <li>white to blue for increasing information content</li>
+ * </ul>
*
* @param symbol
- * @param position
- * @return Red for an insertion, white for a gap, orange for a negative
- * information content, white to blue for increasing information
- * content.
+ * @param column
+ * @return
*/
- private Color findColour(char symbol, int position)
+ private Color findColour(char symbol, int column)
{
-
- if (Comparison.isGap(symbol))
+ if (hmm == null || Comparison.isGap(symbol))
{
return Color.white;
}
{
symbol = Character.toUpperCase(symbol);
}
- Double prob;
- prob = hmm.getMatchEmissionProbability(position, symbol);
- double freq = 0;
+
+ double prob = hmm.getMatchEmissionProbability(column, symbol);
+ Float freq = 0f;
String alpha = hmm.getAlphabetType();
- if (!ResidueProperties.backgroundFrequencies.get(alpha).containsKey(symbol))
+ if (!ResidueProperties.backgroundFrequencies.get(alpha)
+ .containsKey(symbol))
{
- return Color.white;
+ return Color.WHITE;
}
else
{
freq = ResidueProperties.backgroundFrequencies.get(alpha).get(symbol);
}
- if (prob == 0)
+ if (prob == 0D)
{
- return new Color(230, 0, 0);
+ return REDDISH;
}
- Double value = Math.log(prob / freq);
+ double value = Math.log(prob / freq.floatValue());
Color colour = null;
if (value > 0)
{
-
- colour = ColorUtils.getGraduatedColour(value.floatValue(), 0,
- Color.WHITE, 4.52f, Color.blue);
+ colour = ColorUtils.getGraduatedColour((float) value, 0,
+ Color.WHITE, MAX_LOG_RATIO, Color.blue);
}
else if (value < 0)
{
return Color.ORANGE;
-
}
return colour;
-
}
-
-
-
-
-
@Override
public void alignmentChanged(AnnotatedCollectionI collection,
Map<SequenceI, SequenceCollectionI> hiddenReps)
{
- List<SequenceI> seqs = collection.getSequences();
- for (SequenceI seq : seqs)
- {
- if (seq.isHMMConsensusSequence())
- {
- hmm = seq.getHMM();
- break;
- }
- }
-
+ /*
+ * ? no need to do anything if alignment is adjusted
+ * since findColour() handles everything
+ */
}
-
-
+ /**
+ * Answers a new colour scheme instance based on the HMM of the first sequence
+ * in sg that has an HMM
+ */
@Override
public ColourSchemeI getInstance(AnnotatedCollectionI sg,
Map<SequenceI, SequenceCollectionI> hiddenRepSequences)
{
- HiddenMarkovModel markov = null;
- List<SequenceI> seqs = sg.getSequences();
- for (SequenceI seq : seqs)
- {
- if (seq.isHMMConsensusSequence())
- {
- markov = seq.getHMM();
- break;
- }
- }
- HMMERColourScheme colour = new HMMERColourScheme(markov);
- return colour;
-
+ HiddenMarkovModel model = null;
+ List<SequenceI> seqs = sg.getHMMConsensusSequences();
+ if (!seqs.isEmpty())
+ {
+ model = seqs.get(0).getHMM();
+ }
+ HMMERColourScheme colour = new HMMERColourScheme(model);
+ return colour;
}
@Override
- public boolean isApplicableTo(AnnotatedCollectionI ac)
+ public String getSchemeName()
{
- return true;
-
+ return JalviewColourScheme.HMMERU.toString();
}
@Override
- public String getSchemeName()
+ public boolean isSimple()
{
-
- return JalviewColourScheme.HMMERU.name();
+ return false;
}
@Override
- public boolean isSimple()
+ public boolean isApplicableTo(AnnotatedCollectionI ac)
{
- return false;
+ return !ac.getHMMConsensusSequences().isEmpty();
}
}
import jalview.datamodel.ResidueCount.SymbolCounts;
import jalview.gui.JvOptionPane;
+import java.util.Arrays;
+
import org.junit.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
assertEquals(rc.getCount('?'), 6);
assertEquals(rc.getCount('!'), 7);
}
+
+ @Test(groups = "Functional")
+ public void testConstructor_forSequences()
+ {
+ SequenceI seq1 = new Sequence("seq1", "abcde--. FCD");
+ SequenceI seq2 = new Sequence("seq2", "ab.kKqBd-.");
+ ResidueCount rc = new ResidueCount(Arrays.asList(seq1, seq2));
+
+ assertEquals(rc.getGapCount(), 7);
+ assertEquals(rc.getTotalCount(), 15); // excludes gaps
+ assertEquals(rc.getCount('a'), 2);
+ assertEquals(rc.getCount('A'), 2);
+ assertEquals(rc.getCount('B'), 3);
+ assertEquals(rc.getCount('c'), 2);
+ assertEquals(rc.getCount('D'), 3);
+ assertEquals(rc.getCount('f'), 1);
+ assertEquals(rc.getCount('K'), 2);
+ assertEquals(rc.getCount('Q'), 1);
+ }
}
--- /dev/null
+package jalview.schemes;
+
+import static org.testng.Assert.assertEquals;
+
+import jalview.datamodel.Alignment;
+import jalview.datamodel.AnnotatedCollectionI;
+import jalview.datamodel.Sequence;
+import jalview.datamodel.SequenceI;
+import jalview.io.DataSourceType;
+import jalview.io.HMMFile;
+
+import java.awt.Color;
+import java.io.IOException;
+import java.net.MalformedURLException;
+
+import org.testng.annotations.Test;
+
+public class HMMERAlignmentColourSchemeTest {
+
+ @Test(groups = "Functional")
+ public void testFindColour() throws MalformedURLException, IOException
+ {
+ HMMFile file = new HMMFile("test/jalview/io/test_PKinase_hmm.txt",
+ DataSourceType.FILE);
+
+ /*
+ * alignment with 20 residues and background frequencies:
+ * A/a, S 3/20 = 0.15
+ * M, K 4/20 = 0.2
+ * V 2/20 = 0.1
+ * Q, R, L 1/20 = 0.05
+ * log(totalCount) = log(20) = 2.996
+ */
+ SequenceI seq1 = new Sequence("seq1", "AAMMMKKKVV");
+ SequenceI seq2 = new Sequence("seq2", "aAM-QKRSSSL");
+ AnnotatedCollectionI ac = new Alignment(new SequenceI[] { seq1, seq2 });
+ ColourSchemeI scheme = new HMMERAlignmentColourScheme(ac,
+ file.getHMM());
+
+ /*
+ * 'A' in column 1, node 2, match emission 2.77204
+ * e-2.77204 = 0.0625
+ * background frequency is 0.15
+ * ratio is < 1, log is negative, colour is Orange
+ */
+ Color actual = scheme.findColour('A', 1, null, null, 0);
+ assertEquals(actual, Color.ORANGE);
+
+ // gap is white
+ actual = scheme.findColour('-', 2, null, null, 0);
+ assertEquals(actual, Color.WHITE);
+ actual = scheme.findColour(' ', 2, null, null, 0);
+ assertEquals(actual, Color.WHITE);
+ actual = scheme.findColour('.', 2, null, null, 0);
+ assertEquals(actual, Color.WHITE);
+
+ /*
+ * 'L' in column 3, node 4, match emission 1.98342
+ * e-1.98342 = 0.1376
+ * background frequency is 0.05
+ * ratio is 2.752, log is 1.012
+ * colour is graduated 1.012/2.996 or 86/255 of the way from
+ * white(255, 255, 255) to blue(0, 0, 255)
+ */
+ actual = scheme.findColour('L', 3, null, null, 0);
+ assertEquals(actual, new Color(169, 169, 255));
+
+ /*
+ * invalid symbol is White
+ */
+ actual = scheme.findColour('X', 2, null, null, 0);
+ assertEquals(actual, Color.WHITE);
+ }
+
+}
import static org.testng.Assert.assertEquals;
import jalview.io.DataSourceType;
-import jalview.io.FileParse;
import jalview.io.HMMFile;
import java.awt.Color;
import org.testng.annotations.Test;
public class HMMERColourSchemeTest {
- HMMERColourScheme scheme = new HMMERColourScheme();
- @Test
+ @Test(groups = "Functional")
public void testFindColour() throws MalformedURLException, IOException
{
+ HMMFile file = new HMMFile("test/jalview/io/test_PKinase_hmm.txt",
+ DataSourceType.FILE);
- HMMFile file = new HMMFile(new FileParse(
- "test/jalview/io/test_PKinase_hmm.txt", DataSourceType.FILE));
-
- scheme.hmm = file.getHMM();
- Color expected;
+ ColourSchemeI scheme = new HMMERColourScheme(file.getHMM());
+ /*
+ * 'A' in column 1, node 2, match emission 2.77204
+ * e-2.77204 = 0.0625
+ * background frequency is 0.0826
+ * ratio is 0.757, log is negative, colour is Orange
+ */
Color actual = scheme.findColour('A', 1, null, null, 0);
assertEquals(actual, Color.ORANGE);
- actual = scheme.findColour('Y', 3, null, null, 0);
+ // gap is white
+ actual = scheme.findColour('-', 2, null, null, 0);
+ assertEquals(actual, Color.WHITE);
+ actual = scheme.findColour(' ', 2, null, null, 0);
+ assertEquals(actual, Color.WHITE);
+ actual = scheme.findColour('.', 2, null, null, 0);
+ assertEquals(actual, Color.WHITE);
+
+ /*
+ * 'Y' in column 4, node 5, match emission 4.41426
+ * e-4.41426 = 0.0121
+ * background frequency is 0.0292
+ * ratio is 0.414, log is negative, colour is Orange
+ */
+ actual = scheme.findColour('Y', 4, null, null, 0);
assertEquals(actual, Color.ORANGE);
+ /*
+ * 'M' in column 109, no matching node, colour is reddish
+ */
actual = scheme.findColour('M', 109, null, null, 0);
- expected = new Color(230, 0, 0);
- assertEquals(actual, expected);
-
+ assertEquals(actual, new Color(230, 0, 0));
+
+ /*
+ * 'I' in column 6, node 7, match emission 1.33015
+ * e-1.33015 = 0.2644
+ * background frequency is 0.0593
+ * ratio is 4.459, log is 1.495
+ * colour is graduated 1.495/4.52 or 84/255 of the way from
+ * white(255, 255, 255) to blue(0, 0, 255)
+ */
actual = scheme.findColour('I', 6, null, null, 0);
- expected = new Color(171, 171, 255);
- assertEquals(actual, expected);
-
+ assertEquals(actual, new Color(171, 171, 255));
+
+ /*
+ * 'V' in column 14, node 15, match emission 0.44769
+ * e-0.44769 = 0.6391
+ * background frequency is 0.0686
+ * ratio is 9.316, log is 2.232
+ * colour is graduated 2.232/4.52 or 126/255 of the way from
+ * white(255, 255, 255) to blue(0, 0, 255)
+ */
actual = scheme.findColour('V', 14, null, null, 0);
- expected = new Color(129, 129, 255);
- assertEquals(actual, expected);
+ assertEquals(actual, new Color(129, 129, 255));
- actual = scheme.findColour('X', 100000000, null, null, 0);
+ /*
+ * invalid symbol is White
+ */
+ actual = scheme.findColour('X', 2, null, null, 0);
assertEquals(actual, Color.WHITE);
-
-
}
}