- SequenceI [] sequences;
- int start;
- int end;
- Vector seqNums; // vector of int vectors where first is sequence checksum
- int maxLength = 0; // used by quality calcs
- boolean seqNumsChanged = false; // updated after any change via calcSeqNum;
- Hashtable [] total;
-
- /** Stores calculated quality values */
- public Vector quality;
-
- /** Stores maximum and minimum values of quality values */
- public Double[] qualityRange = new Double[2];
- String consString = "";
- Sequence consSequence;
- Hashtable propHash;
- int threshold;
- String name = "";
- int[][] cons2;
-
- /**
- * Creates a new Conservation object.
- *
- * @param name Name of conservation
- * @param propHash DOCUMENT ME!
- * @param threshold to count the residues in residueHash(). commonly used value is 3
- * @param sequences sequences to be used in calculation
- * @param start start residue position
- * @param end end residue position
- */
- public Conservation(String name, Hashtable propHash, int threshold,
- Vector sequences, int start, int end)
+ /*
+ * need to have a minimum of 3% of sequences with a residue
+ * for it to be included in the conservation calculation
+ */
+ private static final int THRESHOLD_PERCENT = 3;
+
+ private static final int TOUPPERCASE = 'a' - 'A';
+
+ private static final int GAP_INDEX = -1;
+
+ SequenceI[] sequences;
+
+ int start;
+
+ int end;
+
+ /*
+ * a list whose i'th element is an array whose first entry is the checksum
+ * of the i'th sequence, followed by residues encoded to score matrix index
+ */
+ Vector<int[]> seqNums;
+
+ int maxLength = 0; // used by quality calcs
+
+ boolean seqNumsChanged = false; // updated after any change via calcSeqNum;
+
+ /*
+ * a map per column with {property, conservation} where conservation value is
+ * 1 (property is conserved), 0 (absence of property is conserved) or -1
+ * (property is not conserved i.e. column has residues with and without it)
+ */
+ Map<String, Integer>[] total;
+
+ /*
+ * if true then conservation calculation will map all symbols to canonical aa
+ * numbering rather than consider conservation of that symbol
+ */
+ boolean canonicaliseAa = true;
+
+ private Vector<Double> quality;
+
+ private double qualityMinimum;
+
+ private double qualityMaximum;
+
+ private Sequence consSequence;
+
+ /*
+ * percentage of residues in a column to qualify for counting conservation
+ */
+ private int threshold;
+
+ private String name = "";
+
+ /*
+ * an array, for each column, of counts of symbols (by score matrix index)
+ */
+ private int[][] cons2;
+
+ /*
+ * gap counts for each column
+ */
+ private int[] cons2GapCounts;
+
+ private String[] consSymbs;
+
+ /**
+ * Constructor using default threshold of 3%
+ *
+ * @param name
+ * Name of conservation
+ * @param sequences
+ * sequences to be used in calculation
+ * @param start
+ * start residue position
+ * @param end
+ * end residue position
+ */
+ public Conservation(String name, List<SequenceI> sequences, int start,
+ int end)
+ {
+ this(name, THRESHOLD_PERCENT, sequences, start, end);
+ }
+
+ /**
+ * Constructor
+ *
+ * @param name
+ * Name of conservation
+ * @param threshold
+ * percentage of sequences at or below which property conservation is
+ * ignored
+ * @param sequences
+ * sequences to be used in calculation
+ * @param start
+ * start column position
+ * @param end
+ * end column position
+ */
+ public Conservation(String name, int threshold, List<SequenceI> sequences,
+ int start, int end)
+ {
+ this.name = name;
+ this.threshold = threshold;
+ this.start = start;
+ this.end = end;
+
+ maxLength = end - start + 1; // default width includes bounds of
+ // calculation
+
+ int s, sSize = sequences.size();
+ SequenceI[] sarray = new SequenceI[sSize];
+ this.sequences = sarray;
+ try