*/
package jalview.analysis;
-import jalview.analysis.ResidueCount.SymbolCounts;
import jalview.datamodel.AlignmentAnnotation;
import jalview.datamodel.Annotation;
+import jalview.datamodel.ResidueCount;
+import jalview.datamodel.ResidueCount.SymbolCounts;
import jalview.datamodel.Sequence;
import jalview.datamodel.SequenceI;
import jalview.schemes.ResidueProperties;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
+import java.util.SortedMap;
import java.util.TreeMap;
import java.util.Vector;
/**
* Calculates conservation values for a given set of sequences
- *
- * @author $author$
- * @version $Revision$
*/
public class Conservation
{
+ /*
+ * need to have a minimum of 3% of sequences with a residue
+ * for it to be included in the conservation calculation
+ */
+ private static final int THRESHOLD_PERCENT = 3;
+
private static final int TOUPPERCASE = 'a' - 'A';
SequenceI[] sequences;
/*
* a map per column with {property, conservation} where conservation value is
- * 1 (property is conserved), 0 (property is negatively conserved) or -1
+ * 1 (property is conserved), 0 (absence of property is conserved) or -1
* (property is not conserved i.e. column has residues with and without it)
*/
Map<String, Integer>[] total;
private String[] consSymbs;
/**
- * Creates a new Conservation object.
+ * Constructor using default threshold of 3%
*
* @param name
* Name of conservation
- * @param threshold
- * to count the residues in residueHash(). commonly used value is 3
* @param sequences
* sequences to be used in calculation
* @param start
* @param end
* end residue position
*/
+ public Conservation(String name, List<SequenceI> sequences, int start,
+ int end)
+ {
+ this(name, THRESHOLD_PERCENT, sequences, start, end);
+ }
+
+ /**
+ * Constructor
+ *
+ * @param name
+ * Name of conservation
+ * @param threshold
+ * percentage of sequences at or below which property conservation is
+ * ignored
+ * @param sequences
+ * sequences to be used in calculation
+ * @param start
+ * start column position
+ * @param end
+ * end column position
+ */
public Conservation(String name, int threshold,
List<SequenceI> sequences, int start, int end)
{
{
ResidueCount values = countResidues(column);
- // TODO is threshold a percentage or count value?
+ /*
+ * percentage count at or below which we ignore residues
+ */
int thresh = (threshold * height) / 100;
/*
* check observed residues in column and record whether each
- * physico-chemical property is conserved (+1), negatively conserved (0),
+ * physico-chemical property is conserved (+1), absence conserved (0),
* or not conserved (-1)
* Using TreeMap means properties are displayed in alphabetical order
*/
- Map<String, Integer> resultHash = new TreeMap<String, Integer>();
+ SortedMap<String, Integer> resultHash = new TreeMap<String, Integer>();
SymbolCounts symbolCounts = values.getSymbolCounts();
char[] symbols = symbolCounts.symbols;
int[] counts = symbolCounts.values;
if (result == -1)
{
/*
- * not conserved either positively or negatively
+ * not conserved (present or absent)
*/
continue;
}
if (result == 0 && !positiveOnly)
{
/*
- * negatively conserved property (all residues lack it)
+ * absense of property is conserved (all residues lack it)
*/
negatives.append(negatives.length() == 0 ? "" : " ");
negatives.append("!").append(type);
*
* @param name
* - name of conservation
- * @param threshold
- * - minimum number of conserved residues needed to indicate
- * conservation (typically 3)
* @param seqs
* @param start
* first column in calculation window
* @return Conservation object ready for use in visualization
*/
public static Conservation calculateConservation(String name,
- int threshold, List<SequenceI> seqs, int start, int end,
- boolean positiveOnly, int maxPercentGaps, boolean calcQuality)
+ List<SequenceI> seqs, int start, int end, boolean positiveOnly,
+ int maxPercentGaps, boolean calcQuality)
{
- Conservation cons = new Conservation(name, threshold, seqs, start, end);
+ Conservation cons = new Conservation(name, seqs, start, end);
cons.calculate();
cons.verdict(positiveOnly, maxPercentGaps);
/**
* Returns the computed tooltip (annotation description) for a given column.
* The tip is empty if the conservation score is zero, otherwise holds the
- * positively (and, optionally, negatively) conserved properties.
+ * conserved properties (and, optionally, properties whose absence is
+ * conserved).
*
* @param column
* @return