2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.analysis;
23 import static org.testng.Assert.assertEquals;
24 import static org.testng.Assert.assertTrue;
26 import jalview.datamodel.Sequence;
27 import jalview.datamodel.SequenceI;
29 import java.util.ArrayList;
30 import java.util.HashMap;
31 import java.util.List;
34 import org.testng.annotations.Test;
36 public class ConservationTest
38 @Test(groups = "Functional")
39 public void testRecordConservation()
41 Map<String, Integer> resultMap = new HashMap<String, Integer>();
43 // V is hydrophobic, aliphatic, small
44 Conservation.recordConservation(resultMap, "V");
45 assertEquals(resultMap.get("hydrophobic").intValue(), 1);
46 assertEquals(resultMap.get("aliphatic").intValue(), 1);
47 assertEquals(resultMap.get("small").intValue(), 1);
48 assertEquals(resultMap.get("tiny").intValue(), 0);
49 assertEquals(resultMap.get("polar").intValue(), 0);
50 assertEquals(resultMap.get("charged").intValue(), 0);
52 // now add S: not hydrophobic, small, tiny, polar, not aliphatic
53 Conservation.recordConservation(resultMap, "s");
54 assertEquals(resultMap.get("hydrophobic").intValue(), -1);
55 assertEquals(resultMap.get("aliphatic").intValue(), -1);
56 assertEquals(resultMap.get("small").intValue(), 1);
57 assertEquals(resultMap.get("tiny").intValue(), -1);
58 assertEquals(resultMap.get("polar").intValue(), -1);
59 assertEquals(resultMap.get("charged").intValue(), 0);
62 @Test(groups = "Functional")
63 public void testCountConservationAndGaps()
65 List<SequenceI> seqs = new ArrayList<SequenceI>();
66 seqs.add(new Sequence("seq1", "VGnY")); // not case sensitive
67 seqs.add(new Sequence("seq2", "-G-y"));
68 seqs.add(new Sequence("seq3", "VG-Y"));
69 seqs.add(new Sequence("seq4", "VGNW"));
71 Conservation cons = new Conservation("", seqs, 0, 50);
72 int[] counts = cons.countConservationAndGaps(0);
73 assertEquals(counts[0], 1); // conserved
74 assertEquals(counts[1], 1); // gap count
75 counts = cons.countConservationAndGaps(1);
76 assertEquals(counts[0], 1);
77 assertEquals(counts[1], 0);
78 counts = cons.countConservationAndGaps(2);
79 assertEquals(counts[0], 1);
80 assertEquals(counts[1], 2);
81 counts = cons.countConservationAndGaps(3);
82 assertEquals(counts[0], 0); // not conserved
83 assertEquals(counts[1], 0);
86 @Test(groups = "Functional")
87 public void testCalculate_noThreshold()
89 List<SequenceI> seqs = new ArrayList<SequenceI>();
90 seqs.add(new Sequence("seq1", "VGIV-N"));
91 seqs.add(new Sequence("seq2", "V-iL-N")); // not case sensitive
92 seqs.add(new Sequence("seq3", "V-IW-N"));
93 seqs.add(new Sequence("seq4", "VGLH-L"));
95 Conservation cons = new Conservation("", 0, seqs, 0, 5);
99 * column 0: all V (hydrophobic/aliphatic/small)
101 Map<String, Integer> colCons = cons.total[0];
102 assertEquals(colCons.get("hydrophobic").intValue(), 1);
103 assertEquals(colCons.get("aliphatic").intValue(), 1);
104 assertEquals(colCons.get("small").intValue(), 1);
105 assertEquals(colCons.get("tiny").intValue(), 0);
106 assertEquals(colCons.get("proline").intValue(), 0);
107 assertEquals(colCons.get("charged").intValue(), 0);
108 assertEquals(colCons.get("negative").intValue(), 0);
109 assertEquals(colCons.get("polar").intValue(), 0);
110 assertEquals(colCons.get("positive").intValue(), 0);
111 assertEquals(colCons.get("aromatic").intValue(), 0);
114 * column 1: all G (hydrophobic/small/tiny)
115 * gaps take default value of property present
117 colCons = cons.total[1];
118 assertEquals(colCons.get("hydrophobic").intValue(), 1);
119 assertEquals(colCons.get("aliphatic").intValue(), -1);
120 assertEquals(colCons.get("small").intValue(), 1);
121 assertEquals(colCons.get("tiny").intValue(), 1);
122 assertEquals(colCons.get("proline").intValue(), -1);
123 assertEquals(colCons.get("charged").intValue(), -1);
124 assertEquals(colCons.get("negative").intValue(), -1);
125 assertEquals(colCons.get("polar").intValue(), -1);
126 assertEquals(colCons.get("positive").intValue(), -1);
127 assertEquals(colCons.get("aromatic").intValue(), -1);
130 * column 2: I/L (aliphatic/hydrophobic), all others negatively conserved
132 colCons = cons.total[2];
133 assertEquals(colCons.get("hydrophobic").intValue(), 1);
134 assertEquals(colCons.get("aliphatic").intValue(), 1);
135 assertEquals(colCons.get("small").intValue(), 0);
136 assertEquals(colCons.get("tiny").intValue(), 0);
137 assertEquals(colCons.get("proline").intValue(), 0);
138 assertEquals(colCons.get("charged").intValue(), 0);
139 assertEquals(colCons.get("negative").intValue(), 0);
140 assertEquals(colCons.get("polar").intValue(), 0);
141 assertEquals(colCons.get("positive").intValue(), 0);
142 assertEquals(colCons.get("aromatic").intValue(), 0);
145 * column 3: VLWH all hydrophobic, none is tiny, negative or proline
147 colCons = cons.total[3];
148 assertEquals(colCons.get("hydrophobic").intValue(), 1);
149 assertEquals(colCons.get("aliphatic").intValue(), -1);
150 assertEquals(colCons.get("small").intValue(), -1);
151 assertEquals(colCons.get("tiny").intValue(), 0);
152 assertEquals(colCons.get("proline").intValue(), 0);
153 assertEquals(colCons.get("charged").intValue(), -1);
154 assertEquals(colCons.get("negative").intValue(), 0);
155 assertEquals(colCons.get("polar").intValue(), -1);
156 assertEquals(colCons.get("positive").intValue(), -1);
157 assertEquals(colCons.get("aromatic").intValue(), -1);
160 * column 4: all gaps - counted as having all properties
162 colCons = cons.total[4];
163 assertEquals(colCons.get("hydrophobic").intValue(), 1);
164 assertEquals(colCons.get("aliphatic").intValue(), 1);
165 assertEquals(colCons.get("small").intValue(), 1);
166 assertEquals(colCons.get("tiny").intValue(), 1);
167 assertEquals(colCons.get("proline").intValue(), 1);
168 assertEquals(colCons.get("charged").intValue(), 1);
169 assertEquals(colCons.get("negative").intValue(), 1);
170 assertEquals(colCons.get("polar").intValue(), 1);
171 assertEquals(colCons.get("positive").intValue(), 1);
172 assertEquals(colCons.get("aromatic").intValue(), 1);
175 * column 5: N (small polar) and L (aliphatic hydrophobic)
176 * have nothing in common!
178 colCons = cons.total[5];
179 assertEquals(colCons.get("hydrophobic").intValue(), -1);
180 assertEquals(colCons.get("aliphatic").intValue(), -1);
181 assertEquals(colCons.get("small").intValue(), -1);
182 assertEquals(colCons.get("tiny").intValue(), 0);
183 assertEquals(colCons.get("proline").intValue(), 0);
184 assertEquals(colCons.get("charged").intValue(), 0);
185 assertEquals(colCons.get("negative").intValue(), 0);
186 assertEquals(colCons.get("polar").intValue(), -1);
187 assertEquals(colCons.get("positive").intValue(), 0);
188 assertEquals(colCons.get("aromatic").intValue(), 0);
192 * Test for the case whether the number of non-gapped sequences in a column
193 * has to be above a threshold
195 @Test(groups = "Functional")
196 public void testCalculate_threshold()
198 List<SequenceI> seqs = new ArrayList<SequenceI>();
199 seqs.add(new Sequence("seq1", "VGIV-"));
200 seqs.add(new Sequence("seq2", "V-iL-")); // not case sensitive
201 seqs.add(new Sequence("seq3", "V-IW-"));
202 seqs.add(new Sequence("seq4", "VGLH-"));
203 seqs.add(new Sequence("seq5", "VGLH-"));
206 * threshold 50% means a residue has to occur 3 or more times
207 * in a column to be counted for conservation
209 // TODO: ConservationThread uses a value of 3
210 // calculateConservation states it is the minimum number of sequences
211 // but it is treated as percentage threshold in calculate() ?
212 Conservation cons = new Conservation("", 50, seqs, 0, 4);
216 * column 0: all V (hydrophobic/aliphatic/small)
218 Map<String, Integer> colCons = cons.total[0];
219 assertEquals(colCons.get("hydrophobic").intValue(), 1);
220 assertEquals(colCons.get("aliphatic").intValue(), 1);
221 assertEquals(colCons.get("small").intValue(), 1);
222 assertEquals(colCons.get("tiny").intValue(), 0);
223 assertEquals(colCons.get("proline").intValue(), 0);
224 assertEquals(colCons.get("charged").intValue(), 0);
225 assertEquals(colCons.get("negative").intValue(), 0);
226 assertEquals(colCons.get("polar").intValue(), 0);
227 assertEquals(colCons.get("positive").intValue(), 0);
228 assertEquals(colCons.get("aromatic").intValue(), 0);
231 * column 1: all G (hydrophobic/small/tiny)
232 * gaps are ignored as not above threshold
234 colCons = cons.total[1];
235 assertEquals(colCons.get("hydrophobic").intValue(), 1);
236 assertEquals(colCons.get("aliphatic").intValue(), 0);
237 assertEquals(colCons.get("small").intValue(), 1);
238 assertEquals(colCons.get("tiny").intValue(), 1);
239 assertEquals(colCons.get("proline").intValue(), 0);
240 assertEquals(colCons.get("charged").intValue(), 0);
241 assertEquals(colCons.get("negative").intValue(), 0);
242 assertEquals(colCons.get("polar").intValue(), 0);
243 assertEquals(colCons.get("positive").intValue(), 0);
244 assertEquals(colCons.get("aromatic").intValue(), 0);
247 * column 2: I/L (aliphatic/hydrophobic), all others negatively conserved
249 colCons = cons.total[2];
250 assertEquals(colCons.get("hydrophobic").intValue(), 1);
251 assertEquals(colCons.get("aliphatic").intValue(), 1);
252 assertEquals(colCons.get("small").intValue(), 0);
253 assertEquals(colCons.get("tiny").intValue(), 0);
254 assertEquals(colCons.get("proline").intValue(), 0);
255 assertEquals(colCons.get("charged").intValue(), 0);
256 assertEquals(colCons.get("negative").intValue(), 0);
257 assertEquals(colCons.get("polar").intValue(), 0);
258 assertEquals(colCons.get("positive").intValue(), 0);
259 assertEquals(colCons.get("aromatic").intValue(), 0);
262 * column 3: nothing above threshold
264 colCons = cons.total[3];
265 assertTrue(colCons.isEmpty());
268 * column 4: all gaps - counted as having all properties
270 colCons = cons.total[4];
271 assertEquals(colCons.get("hydrophobic").intValue(), 1);
272 assertEquals(colCons.get("aliphatic").intValue(), 1);
273 assertEquals(colCons.get("small").intValue(), 1);
274 assertEquals(colCons.get("tiny").intValue(), 1);
275 assertEquals(colCons.get("proline").intValue(), 1);
276 assertEquals(colCons.get("charged").intValue(), 1);
277 assertEquals(colCons.get("negative").intValue(), 1);
278 assertEquals(colCons.get("polar").intValue(), 1);
279 assertEquals(colCons.get("positive").intValue(), 1);
280 assertEquals(colCons.get("aromatic").intValue(), 1);
284 * Test the method that derives the conservation 'sequence' and the mouseover
285 * tooltips from the computed conservation
287 @Test(groups = "Functional")
288 public void testVerdict()
290 List<SequenceI> seqs = new ArrayList<SequenceI>();
291 seqs.add(new Sequence("seq1", "VGIVV-H"));
292 seqs.add(new Sequence("seq2", "VGILL-H"));
293 seqs.add(new Sequence("seq3", "VGIW--R"));
294 seqs.add(new Sequence("seq4", "VGLHH--"));
295 seqs.add(new Sequence("seq5", "VGLHH-R"));
296 seqs.add(new Sequence("seq6", "VGLHH--"));
297 seqs.add(new Sequence("seq7", "VGLHH-R"));
298 seqs.add(new Sequence("seq8", "VGLHH-R"));
300 // calculate with no threshold
301 Conservation cons = new Conservation("", 0, seqs, 0, 6);
303 // positive and negative conservation where <25% gaps in columns
304 cons.verdict(false, 25);
307 * verify conservation 'sequence'
308 * cols 0 fully conserved and above threshold (*)
309 * col 2 properties fully conserved (+)
310 * col 3 VLWH 1 positively and 3 negatively conserved properties
311 * col 4 has 1 positively conserved property, but because gap contributes a
312 * 'positive' for all properties, no negative conservation is counted
314 * col 6 has 25% gaps so fails threshold test
316 assertEquals(cons.getConsSequence().getSequenceAsString(), "**+41--");
319 * verify tooltips; conserved properties are sorted alphabetically within
320 * positive followed by negative
324 "aliphatic hydrophobic small !aromatic !charged !negative !polar !positive !proline !tiny");
327 "hydrophobic small tiny !aliphatic !aromatic !charged !negative !polar !positive !proline");
330 "aliphatic hydrophobic !aromatic !charged !negative !polar !positive !proline !small !tiny");
331 assertEquals(cons.getTooltip(3), "hydrophobic !negative !proline !tiny");
332 assertEquals(cons.getTooltip(4), "hydrophobic");
333 assertEquals(cons.getTooltip(5), "");
334 assertEquals(cons.getTooltip(6), "");