From 9f6812a0b52a027056dff1425d20f674d114e55c Mon Sep 17 00:00:00 2001 From: gmungoc Date: Tue, 1 Nov 2016 16:14:18 +0000 Subject: [PATCH] Conservation now using ResidueCount; tests added --- src/jalview/analysis/Conservation.java | 365 +++++++++++++++------------ test/jalview/analysis/ConservationTest.java | 316 +++++++++++++++++++++++ 2 files changed, 525 insertions(+), 156 deletions(-) create mode 100644 test/jalview/analysis/ConservationTest.java diff --git a/src/jalview/analysis/Conservation.java b/src/jalview/analysis/Conservation.java index 8127747..73a9dee 100755 --- a/src/jalview/analysis/Conservation.java +++ b/src/jalview/analysis/Conservation.java @@ -20,16 +20,18 @@ */ package jalview.analysis; +import jalview.analysis.ResidueCount.SymbolCounts; import jalview.datamodel.AlignmentAnnotation; import jalview.datamodel.Annotation; import jalview.datamodel.Sequence; import jalview.datamodel.SequenceI; -import jalview.ext.android.SparseIntArray; import jalview.schemes.ResidueProperties; +import jalview.util.Comparison; import java.awt.Color; import java.util.List; import java.util.Map; +import java.util.Map.Entry; import java.util.TreeMap; import java.util.Vector; @@ -41,6 +43,8 @@ import java.util.Vector; */ public class Conservation { + private static final int TOUPPERCASE = 'a' - 'A'; + SequenceI[] sequences; int start; @@ -54,6 +58,11 @@ public class Conservation boolean seqNumsChanged = false; // updated after any change via calcSeqNum; + /* + * a map per column with {property, conservation} where conservation value is + * 1 (property is conserved), 0 (property is negatively conserved) or -1 + * (property is not conserved i.e. column has residues with and without it) + */ Map[] total; boolean canonicaliseAa = true; // if true then conservation calculation will @@ -70,6 +79,9 @@ public class Conservation private Sequence consSequence; + /* + * percentage of residues in a column to qualify for counting conservation + */ private int threshold; private String name = ""; @@ -189,154 +201,185 @@ public class Conservation */ public void calculate() { - int jSize = sequences.length; - // int[] values; // Replaces residueHash - SparseIntArray values = new SparseIntArray(); + int height = sequences.length; total = new Map[maxLength]; - for (int i = start; i <= end; i++) + for (int column = start; column <= end; column++) { - // values = new int[255]; - values.clear(); + ResidueCount values = countResidues(column); + + // TODO is threshold a percentage or count value? + int thresh = (threshold * height) / 100; - for (int j = 0; j < jSize; j++) + /* + * check observed residues in column and record whether each + * physico-chemical property is conserved (+1), negatively conserved (0), + * or not conserved (-1) + * Using TreeMap means properties are displayed in alphabetical order + */ + Map resultHash = new TreeMap(); + SymbolCounts symbolCounts = values.getSymbolCounts(); + char[] symbols = symbolCounts.symbols; + int[] counts = symbolCounts.values; + for (int j = 0; j < symbols.length; j++) { - if (sequences[j].getLength() > i) + char c = symbols[j]; + if (counts[j] > thresh) { - char c = sequences[j].getCharAt(i); - - if (canonicaliseAa) - { // lookup the base aa code symbol - c = (char) ResidueProperties.aaIndex[sequences[j].getCharAt(i)]; - if (c > 20) - { - c = '-'; - } - else - { - // recover canonical aa symbol - c = ResidueProperties.aa[c].charAt(0); - } - } - else - { - // original behaviour - operate on ascii symbols directly - // No need to check if its a '-' - if (c == '.' || c == ' ') - { - c = '-'; - } - - c = toUpperCase(c); - } - // values[c]++; - values.add(c, 1); + recordConservation(resultHash, String.valueOf(c)); + } + } + if (values.getGapCount() > thresh) + { + recordConservation(resultHash, "-"); + } + + if (total.length > 0) + { + total[column - start] = resultHash; + } + } + } + + /** + * Updates the conservation results for an observed residue + * + * @param resultMap + * a map of {property, conservation} where conservation value is +1 + * (all residues have the property), 0 (no residue has the property) + * or -1 (some do, some don't) + * @param res + */ + protected static void recordConservation(Map resultMap, + String res) + { + res = res.toUpperCase(); + for (Entry> property : ResidueProperties.propHash + .entrySet()) + { + String propertyName = property.getKey(); + Integer residuePropertyValue = property.getValue().get(res); + + if (!resultMap.containsKey(propertyName)) + { + /* + * first time we've seen this residue - note whether it has this property + */ + if (residuePropertyValue != null) + { + resultMap.put(propertyName, residuePropertyValue); } else { - // values['-']++; - values.add('-', 1); + /* + * unrecognised residue - use default value for property + */ + resultMap.put(propertyName, property.getValue().get("-")); } } + else + { + Integer currentResult = resultMap.get(propertyName); + if (currentResult.intValue() != -1 + && !currentResult.equals(residuePropertyValue)) + { + /* + * property is unconserved - residues seen both with and without it + */ + resultMap.put(propertyName, Integer.valueOf(-1)); + } + } + } + } - // What is the count threshold to count the residues in residueHash() - int thresh = (threshold * jSize) / 100; + /** + * Counts residues (upper-cased) and gaps in the given column + * + * @param column + * @return + */ + protected ResidueCount countResidues(int column) + { + ResidueCount values = new ResidueCount(false); - // loop over all the found residues - // Hashtable resultHash = new Hashtable(); - Map resultHash = new TreeMap(); - // for (char v = '-'; v < 'Z'; v++) - for (int key = 0; key < values.size(); key++) + for (int row = 0; row < sequences.length; row++) + { + if (sequences[row].getLength() > column) { - char v = (char) values.keyAt(key); - // if (values[v] > thresh) - if (values.valueAt(key) > thresh) + char c = sequences[row].getCharAt(column); + if (canonicaliseAa) { - String res = String.valueOf(v); - - // Now loop over the properties - for (String type : ResidueProperties.propHash.keySet()) - { - Map ht = ResidueProperties.propHash.get(type); - - // Have we ticked this before? - if (!resultHash.containsKey(type)) - { - if (ht.containsKey(res)) - { - resultHash.put(type, ht.get(res)); - } - else - { - resultHash.put(type, ht.get("-")); - } - } - else if (!resultHash.get(type).equals(ht.get(res))) - { - resultHash.put(type, new Integer(-1)); - } - } + int index = ResidueProperties.aaIndex[c]; + c = index > 20 ? '-' : ResidueProperties.aa[index].charAt(0); + } + else + { + c = toUpperCase(c); + } + if (Comparison.isGap(c)) + { + values.addGap(); + } + else + { + values.add(c); } } - - if (total.length > 0) + else { - total[i - start] = resultHash; + values.addGap(); } } + return values; } - /***************************************************************************** - * count conservation for the j'th column of the alignment + /** + * Counts conservation and gaps for a column of the alignment * - * @return { gap count, conserved residue count} + * @return { 1 if fully conserved, else 0, gap count } */ - public int[] countConsNGaps(int j) + public int[] countConservationAndGaps(int column) { - int count = 0; - int cons = 0; - int nres = 0; - int[] r = new int[2]; - char f = '$'; - int i, iSize = sequences.length; - char c; + int gapCount = 0; + boolean fullyConserved = true; + int iSize = sequences.length; - for (i = 0; i < iSize; i++) + if (iSize == 0) { - if (j >= sequences[i].getLength()) + return new int[] { 0, 0 }; + } + + char lastRes = '0'; + for (int i = 0; i < iSize; i++) + { + if (column >= sequences[i].getLength()) { - count++; + gapCount++; continue; } - c = sequences[i].getCharAt(j); // gaps do not have upper/lower case + char c = sequences[i].getCharAt(column); // gaps do not have upper/lower case - if (jalview.util.Comparison.isGap((c))) + if (Comparison.isGap((c))) { - count++; + gapCount++; } else { c = toUpperCase(c); - nres++; - - if (nres == 1) + if (lastRes == '0') { - f = c; - cons++; + lastRes = c; } - else if (f == c) + if (c != lastRes) { - cons++; + fullyConserved = false; } } } - r[0] = (nres == cons) ? 1 : 0; - r[1] = count; - + int[] r = new int[] { fullyConserved ? 1 : 0, gapCount }; return r; } @@ -351,7 +394,7 @@ public class Conservation { if ('a' <= c && c <= 'z') { - c -= (32); // 32 = 'a' - 'A' + c -= TOUPPERCASE; } return c; } @@ -359,14 +402,17 @@ public class Conservation /** * Calculates the conservation sequence * - * @param consflag - * if true, positive conservation; false calculates negative - * conservation - * @param percentageGaps - * commonly used value is 25 + * @param positiveOnly + * if true, calculate positive conservation; else calculate both + * positive and negative conservation + * @param maxPercentageGaps + * the percentage of gaps in a column, at or above which no + * conservation is asserted */ - public void verdict(boolean consflag, float percentageGaps) + public void verdict(boolean positiveOnly, float maxPercentageGaps) { + // TODO call this at the end of calculate(), should not be a public method + StringBuilder consString = new StringBuilder(end); // NOTE THIS SHOULD CHECK IF THE CONSEQUENCE ALREADY @@ -379,56 +425,43 @@ public class Conservation consSymbs = new String[end - start + 1]; for (int i = start; i <= end; i++) { - int[] gapcons = countConsNGaps(i); + int[] gapcons = countConservationAndGaps(i); + boolean fullyConserved = gapcons[0] == 1; int totGaps = gapcons[1]; - float pgaps = ((float) totGaps * 100) / sequences.length; - StringBuilder positives = new StringBuilder(64); - StringBuilder negatives = new StringBuilder(32); - // consSymbs[i - start] = ""; + float pgaps = (totGaps * 100f) / sequences.length; - if (percentageGaps > pgaps) + if (maxPercentageGaps > pgaps) { Map resultHash = total[i - start]; - // Now find the verdict int count = 0; + StringBuilder positives = new StringBuilder(64); + StringBuilder negatives = new StringBuilder(32); for (String type : resultHash.keySet()) { int result = resultHash.get(type).intValue(); - // Do we want to count +ve conservation or +ve and -ve cons.? - if (consflag) + if (result == -1) + { + /* + * not conserved either positively or negatively + */ + continue; + } + count++; + if (result == 1) { - if (result == 1) - { - // consSymbs[i - start] = type + " " + consSymbs[i - start]; - positives.append(positives.length() == 0 ? "" : " "); - positives.append(type); - count++; - } + /* + * positively conserved property (all residues have it) + */ + positives.append(positives.length() == 0 ? "" : " "); + positives.append(type); } - else + if (result == 0 && !positiveOnly) { - if (result != -1) - { - if (result == 0) - { - /* - * add negatively conserved properties on the end - */ - // consSymbs[i - start] = consSymbs[i - start] + " !" + type; - negatives.append(negatives.length() == 0 ? "" : " "); - negatives.append("!").append(type); - } - else - { - /* - * put positively conserved properties on the front - */ - // consSymbs[i - start] = type + " " + consSymbs[i - start]; - positives.append(positives.length() == 0 ? "" : " "); - positives.append(type); - } - count++; - } + /* + * negatively conserved property (all residues lack it) + */ + negatives.append(negatives.length() == 0 ? "" : " "); + negatives.append("!").append(type); } } if (negatives.length() > 0) @@ -443,7 +476,7 @@ public class Conservation } else { - consString.append((gapcons[0] == 1) ? "*" : "+"); + consString.append(fullyConserved ? "*" : "+"); } } else @@ -755,9 +788,10 @@ public class Conservation * first column in calculation window * @param end * last column in calculation window - * @param posOrNeg - * positive (true) or negative (false) conservation - * @param consPercGaps + * @param positiveOnly + * calculate positive (true) or positive and negative (false) + * conservation + * @param maxPercentGaps * percentage of gaps tolerated in column * @param calcQuality * flag indicating if alignment quality should be calculated @@ -765,11 +799,11 @@ public class Conservation */ public static Conservation calculateConservation(String name, int threshold, List seqs, int start, int end, - boolean posOrNeg, int consPercGaps, boolean calcQuality) + boolean positiveOnly, int maxPercentGaps, boolean calcQuality) { Conservation cons = new Conservation(name, threshold, seqs, start, end); cons.calculate(); - cons.verdict(posOrNeg, consPercGaps); + cons.verdict(positiveOnly, maxPercentGaps); if (calcQuality) { @@ -778,4 +812,23 @@ public class Conservation return cons; } + + /** + * Returns the computed tooltip (annotation description) for a given column. + * The tip is empty if the conservation score is zero, otherwise holds the + * positively (and, optionally, negatively) conserved properties. + * + * @param column + * @return + */ + String getTooltip(int column) + { + char[] sequence = getConsSequence().getSequence(); + char val = column < sequence.length ? sequence[column] : '-'; + boolean hasConservation = val != '-' && val != '0'; + int consp = column - start; + String tip = (hasConservation && consp > -1 && consp < consSymbs.length) ? consSymbs[consp] + : ""; + return tip; + } } diff --git a/test/jalview/analysis/ConservationTest.java b/test/jalview/analysis/ConservationTest.java new file mode 100644 index 0000000..c419687 --- /dev/null +++ b/test/jalview/analysis/ConservationTest.java @@ -0,0 +1,316 @@ +package jalview.analysis; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertTrue; + +import jalview.datamodel.Sequence; +import jalview.datamodel.SequenceI; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.testng.annotations.Test; + +public class ConservationTest +{ + @Test(groups = "Functional") + public void testRecordConservation() + { + Map resultMap = new HashMap(); + + // V is hydrophobic, aliphatic, small + Conservation.recordConservation(resultMap, "V"); + assertEquals(resultMap.get("hydrophobic").intValue(), 1); + assertEquals(resultMap.get("aliphatic").intValue(), 1); + assertEquals(resultMap.get("small").intValue(), 1); + assertEquals(resultMap.get("tiny").intValue(), 0); + assertEquals(resultMap.get("polar").intValue(), 0); + assertEquals(resultMap.get("charged").intValue(), 0); + + // now add S: not hydrophobic, small, tiny, polar, not aliphatic + Conservation.recordConservation(resultMap, "s"); + assertEquals(resultMap.get("hydrophobic").intValue(), -1); + assertEquals(resultMap.get("aliphatic").intValue(), -1); + assertEquals(resultMap.get("small").intValue(), 1); + assertEquals(resultMap.get("tiny").intValue(), -1); + assertEquals(resultMap.get("polar").intValue(), -1); + assertEquals(resultMap.get("charged").intValue(), 0); + } + + @Test(groups = "Functional") + public void testCountConservationAndGaps() + { + List seqs = new ArrayList(); + seqs.add(new Sequence("seq1", "VGnY")); // not case sensitive + seqs.add(new Sequence("seq2", "-G-y")); + seqs.add(new Sequence("seq3", "VG-Y")); + seqs.add(new Sequence("seq4", "VGNW")); + + Conservation cons = new Conservation("", 3, seqs, 0, 50); + int[] counts = cons.countConservationAndGaps(0); + assertEquals(counts[0], 1); // conserved + assertEquals(counts[1], 1); // gap count + counts = cons.countConservationAndGaps(1); + assertEquals(counts[0], 1); + assertEquals(counts[1], 0); + counts = cons.countConservationAndGaps(2); + assertEquals(counts[0], 1); + assertEquals(counts[1], 2); + counts = cons.countConservationAndGaps(3); + assertEquals(counts[0], 0); // not conserved + assertEquals(counts[1], 0); + } + + @Test(groups = "Functional") + public void testCalculate_noThreshold() + { + List seqs = new ArrayList(); + seqs.add(new Sequence("seq1", "VGIV-N")); + seqs.add(new Sequence("seq2", "V-iL-N")); // not case sensitive + seqs.add(new Sequence("seq3", "V-IW-N")); + seqs.add(new Sequence("seq4", "VGLH-L")); + + Conservation cons = new Conservation("", 0, seqs, 0, 5); + cons.calculate(); + + /* + * column 0: all V (hydrophobic/aliphatic/small) + */ + Map colCons = cons.total[0]; + assertEquals(colCons.get("hydrophobic").intValue(), 1); + assertEquals(colCons.get("aliphatic").intValue(), 1); + assertEquals(colCons.get("small").intValue(), 1); + assertEquals(colCons.get("tiny").intValue(), 0); + assertEquals(colCons.get("proline").intValue(), 0); + assertEquals(colCons.get("charged").intValue(), 0); + assertEquals(colCons.get("negative").intValue(), 0); + assertEquals(colCons.get("polar").intValue(), 0); + assertEquals(colCons.get("positive").intValue(), 0); + assertEquals(colCons.get("aromatic").intValue(), 0); + + /* + * column 1: all G (hydrophobic/small/tiny) + * gaps take default value of property present + */ + colCons = cons.total[1]; + assertEquals(colCons.get("hydrophobic").intValue(), 1); + assertEquals(colCons.get("aliphatic").intValue(), -1); + assertEquals(colCons.get("small").intValue(), 1); + assertEquals(colCons.get("tiny").intValue(), 1); + assertEquals(colCons.get("proline").intValue(), -1); + assertEquals(colCons.get("charged").intValue(), -1); + assertEquals(colCons.get("negative").intValue(), -1); + assertEquals(colCons.get("polar").intValue(), -1); + assertEquals(colCons.get("positive").intValue(), -1); + assertEquals(colCons.get("aromatic").intValue(), -1); + + /* + * column 2: I/L (aliphatic/hydrophobic), all others negatively conserved + */ + colCons = cons.total[2]; + assertEquals(colCons.get("hydrophobic").intValue(), 1); + assertEquals(colCons.get("aliphatic").intValue(), 1); + assertEquals(colCons.get("small").intValue(), 0); + assertEquals(colCons.get("tiny").intValue(), 0); + assertEquals(colCons.get("proline").intValue(), 0); + assertEquals(colCons.get("charged").intValue(), 0); + assertEquals(colCons.get("negative").intValue(), 0); + assertEquals(colCons.get("polar").intValue(), 0); + assertEquals(colCons.get("positive").intValue(), 0); + assertEquals(colCons.get("aromatic").intValue(), 0); + + /* + * column 3: VLWH all hydrophobic, none is tiny, negative or proline + */ + colCons = cons.total[3]; + assertEquals(colCons.get("hydrophobic").intValue(), 1); + assertEquals(colCons.get("aliphatic").intValue(), -1); + assertEquals(colCons.get("small").intValue(), -1); + assertEquals(colCons.get("tiny").intValue(), 0); + assertEquals(colCons.get("proline").intValue(), 0); + assertEquals(colCons.get("charged").intValue(), -1); + assertEquals(colCons.get("negative").intValue(), 0); + assertEquals(colCons.get("polar").intValue(), -1); + assertEquals(colCons.get("positive").intValue(), -1); + assertEquals(colCons.get("aromatic").intValue(), -1); + + /* + * column 4: all gaps - counted as having all properties + */ + colCons = cons.total[4]; + assertEquals(colCons.get("hydrophobic").intValue(), 1); + assertEquals(colCons.get("aliphatic").intValue(), 1); + assertEquals(colCons.get("small").intValue(), 1); + assertEquals(colCons.get("tiny").intValue(), 1); + assertEquals(colCons.get("proline").intValue(), 1); + assertEquals(colCons.get("charged").intValue(), 1); + assertEquals(colCons.get("negative").intValue(), 1); + assertEquals(colCons.get("polar").intValue(), 1); + assertEquals(colCons.get("positive").intValue(), 1); + assertEquals(colCons.get("aromatic").intValue(), 1); + + /* + * column 5: N (small polar) and L (aliphatic hydrophobic) + * have nothing in common! + */ + colCons = cons.total[5]; + assertEquals(colCons.get("hydrophobic").intValue(), -1); + assertEquals(colCons.get("aliphatic").intValue(), -1); + assertEquals(colCons.get("small").intValue(), -1); + assertEquals(colCons.get("tiny").intValue(), 0); + assertEquals(colCons.get("proline").intValue(), 0); + assertEquals(colCons.get("charged").intValue(), 0); + assertEquals(colCons.get("negative").intValue(), 0); + assertEquals(colCons.get("polar").intValue(), -1); + assertEquals(colCons.get("positive").intValue(), 0); + assertEquals(colCons.get("aromatic").intValue(), 0); + } + + /** + * Test for the case whether the number of non-gapped sequences in a column + * has to be above a threshold + */ + @Test(groups = "Functional") + public void testCalculate_threshold() + { + List seqs = new ArrayList(); + seqs.add(new Sequence("seq1", "VGIV-")); + seqs.add(new Sequence("seq2", "V-iL-")); // not case sensitive + seqs.add(new Sequence("seq3", "V-IW-")); + seqs.add(new Sequence("seq4", "VGLH-")); + seqs.add(new Sequence("seq5", "VGLH-")); + + /* + * threshold 50% means a residue has to occur 3 or more times + * in a column to be counted for conservation + */ + // TODO: ConservationThread uses a value of 3 + // calculateConservation states it is the minimum number of sequences + // but it is treated as percentage threshold in calculate() ? + Conservation cons = new Conservation("", 50, seqs, 0, 4); + cons.calculate(); + + /* + * column 0: all V (hydrophobic/aliphatic/small) + */ + Map colCons = cons.total[0]; + assertEquals(colCons.get("hydrophobic").intValue(), 1); + assertEquals(colCons.get("aliphatic").intValue(), 1); + assertEquals(colCons.get("small").intValue(), 1); + assertEquals(colCons.get("tiny").intValue(), 0); + assertEquals(colCons.get("proline").intValue(), 0); + assertEquals(colCons.get("charged").intValue(), 0); + assertEquals(colCons.get("negative").intValue(), 0); + assertEquals(colCons.get("polar").intValue(), 0); + assertEquals(colCons.get("positive").intValue(), 0); + assertEquals(colCons.get("aromatic").intValue(), 0); + + /* + * column 1: all G (hydrophobic/small/tiny) + * gaps are ignored as not above threshold + */ + colCons = cons.total[1]; + assertEquals(colCons.get("hydrophobic").intValue(), 1); + assertEquals(colCons.get("aliphatic").intValue(), 0); + assertEquals(colCons.get("small").intValue(), 1); + assertEquals(colCons.get("tiny").intValue(), 1); + assertEquals(colCons.get("proline").intValue(), 0); + assertEquals(colCons.get("charged").intValue(), 0); + assertEquals(colCons.get("negative").intValue(), 0); + assertEquals(colCons.get("polar").intValue(), 0); + assertEquals(colCons.get("positive").intValue(), 0); + assertEquals(colCons.get("aromatic").intValue(), 0); + + /* + * column 2: I/L (aliphatic/hydrophobic), all others negatively conserved + */ + colCons = cons.total[2]; + assertEquals(colCons.get("hydrophobic").intValue(), 1); + assertEquals(colCons.get("aliphatic").intValue(), 1); + assertEquals(colCons.get("small").intValue(), 0); + assertEquals(colCons.get("tiny").intValue(), 0); + assertEquals(colCons.get("proline").intValue(), 0); + assertEquals(colCons.get("charged").intValue(), 0); + assertEquals(colCons.get("negative").intValue(), 0); + assertEquals(colCons.get("polar").intValue(), 0); + assertEquals(colCons.get("positive").intValue(), 0); + assertEquals(colCons.get("aromatic").intValue(), 0); + + /* + * column 3: nothing above threshold + */ + colCons = cons.total[3]; + assertTrue(colCons.isEmpty()); + + /* + * column 4: all gaps - counted as having all properties + */ + colCons = cons.total[4]; + assertEquals(colCons.get("hydrophobic").intValue(), 1); + assertEquals(colCons.get("aliphatic").intValue(), 1); + assertEquals(colCons.get("small").intValue(), 1); + assertEquals(colCons.get("tiny").intValue(), 1); + assertEquals(colCons.get("proline").intValue(), 1); + assertEquals(colCons.get("charged").intValue(), 1); + assertEquals(colCons.get("negative").intValue(), 1); + assertEquals(colCons.get("polar").intValue(), 1); + assertEquals(colCons.get("positive").intValue(), 1); + assertEquals(colCons.get("aromatic").intValue(), 1); + } + + /** + * Test the method that derives the conservation 'sequence' and the mouseover + * tooltips from the computed conservation + */ + @Test(groups = "Functional") + public void testVerdict() + { + List seqs = new ArrayList(); + seqs.add(new Sequence("seq1", "VGIVV-H")); + seqs.add(new Sequence("seq2", "VGILL-H")); + seqs.add(new Sequence("seq3", "VGIW--R")); + seqs.add(new Sequence("seq4", "VGLHH--")); + seqs.add(new Sequence("seq5", "VGLHH-R")); + seqs.add(new Sequence("seq6", "VGLHH--")); + seqs.add(new Sequence("seq7", "VGLHH-R")); + seqs.add(new Sequence("seq8", "VGLHH-R")); + + // calculate with no threshold + Conservation cons = new Conservation("", 0, seqs, 0, 6); + cons.calculate(); + // positive and negative conservation where <25% gaps in columns + cons.verdict(false, 25); + + /* + * verify conservation 'sequence' + * cols 0 fully conserved and above threshold (*) + * col 2 properties fully conserved (+) + * col 3 VLWH 1 positively and 3 negatively conserved properties + * col 4 has 1 positively conserved property, but because gap contributes a + * 'positive' for all properties, no negative conservation is counted + * col 5 is all gaps + * col 6 has 25% gaps so fails threshold test + */ + assertEquals(cons.getConsSequence().getSequenceAsString(), "**+41--"); + + /* + * verify tooltips; conserved properties are sorted alphabetically within + * positive followed by negative + */ + assertEquals( + cons.getTooltip(0), + "aliphatic hydrophobic small !aromatic !charged !negative !polar !positive !proline !tiny"); + assertEquals( + cons.getTooltip(1), + "hydrophobic small tiny !aliphatic !aromatic !charged !negative !polar !positive !proline"); + assertEquals( + cons.getTooltip(2), + "aliphatic hydrophobic !aromatic !charged !negative !polar !positive !proline !small !tiny"); + assertEquals(cons.getTooltip(3), "hydrophobic !negative !proline !tiny"); + assertEquals(cons.getTooltip(4), "hydrophobic"); + assertEquals(cons.getTooltip(5), ""); + assertEquals(cons.getTooltip(6), ""); + } +} -- 1.7.10.2