1 package jalview.analysis;
3 import static org.testng.AssertJUnit.assertEquals;
4 import static org.testng.AssertJUnit.assertNull;
6 import jalview.datamodel.Alignment;
7 import jalview.datamodel.AlignmentAnnotation;
8 import jalview.datamodel.Sequence;
9 import jalview.datamodel.SequenceI;
11 import java.util.List;
13 import org.testng.annotations.BeforeMethod;
14 import org.testng.annotations.Test;
16 public class ParsePropertiesTest
21 private ParseProperties pp;
24 * Construct an alignment with 4 sequences with varying description format
29 SequenceI[] seqs = new SequenceI[]
30 { new Sequence("sq1", "THISISAPLACEHOLDER"),
31 new Sequence("sq2", "THISISAPLACEHOLDER"),
32 new Sequence("sq3", "THISISAPLACEHOLDER"),
33 new Sequence("sq4", "THISISAPLACEHOLDER") };
34 seqs[0].setDescription("1 mydescription1");
35 seqs[1].setDescription("mydescription2");
36 seqs[2].setDescription("2. 0.1 mydescription+3");
37 seqs[3].setDescription("3 0.01 mydescription4");
38 al = new Alignment(seqs);
40 pp = new ParseProperties(al);
45 * Test with a description pattern that matches any string ending in one or
46 * more 'number characters' (0-9+.), i.e. greedily matches any trailing
47 * numeric part of the string
50 public void testGetScoresFromDescription()
52 String regex = ".*([-0-9.+]+)";
53 final int count = pp.getScoresFromDescription("my Score",
54 "my Score Description", regex, true);
55 System.out.println("Matched " + count + " for " + regex);
56 assertEquals(4, count);
59 * Verify values 1/2/3/4 have been parsed from sequence descriptions
61 AlignmentAnnotation[] anns = al.getSequenceAt(0).getAnnotation();
62 assertEquals(1, anns.length);
63 assertEquals(1d, anns[0].getScore(), 0.001d);
64 assertEquals("my Score Description", anns[0].description);
65 assertEquals("my Score", anns[0].label);
66 anns = al.getSequenceAt(1).getAnnotation();
67 assertEquals(1, anns.length);
68 assertEquals(2d, anns[0].getScore(), 0.001d);
69 assertEquals("my Score Description", anns[0].description);
70 assertEquals("my Score", anns[0].label);
71 anns = al.getSequenceAt(2).getAnnotation();
72 assertEquals(1, anns.length);
73 assertEquals(3d, anns[0].getScore(), 0.001d);
74 anns = al.getSequenceAt(3).getAnnotation();
75 assertEquals(1, anns.length);
76 assertEquals(4d, anns[0].getScore(), 0.001d);
80 * Test with a description pattern that matches any string (or none), followed
81 * by a 'number character' (0-9+.), followed by at least one separator
82 * character, followed by at least one 'number character', then any trailing
86 public void testGetScoresFromDescription_twoScores()
88 String regex = ".*([-0-9.+]+).+([-0-9.+]+).*";
89 final int count = pp.getScoresFromDescription("my Score",
90 "my Score Description", regex, true);
91 System.out.println("Matched " + count + " for " + regex);
92 assertEquals(3, count);
95 * Seq1 has two score values parsed out
97 AlignmentAnnotation[] anns = al.getSequenceAt(0).getAnnotation();
98 assertEquals(2, anns.length);
99 assertEquals(1d, anns[0].getScore(), 0.001d);
100 assertEquals("my Score Description", anns[0].description);
101 assertEquals("my Score", anns[0].label);
102 assertEquals(1d, anns[1].getScore(), 0.001d);
103 assertEquals("my Score Description (column 1)", anns[1].description);
104 assertEquals("my Score_1", anns[1].label);
107 * Seq2 has no score parsed out (is this right?)
109 assertNull(al.getSequenceAt(1).getAnnotation());
112 * Seq3 has two score values parsed out
114 // TODO parsed values (1.0 and 3.0) look wrong v description
115 // would expect 2.0 and 0.1
116 // undesired 'greedy' behaviour of regex?
117 anns = al.getSequenceAt(2).getAnnotation();
118 assertEquals(2, anns.length);
119 assertEquals(1d, anns[0].getScore(), 0.001d);
120 assertEquals("my Score Description", anns[0].description);
121 assertEquals("my Score", anns[0].label);
122 assertEquals(3d, anns[1].getScore(), 0.001d);
123 assertEquals("my Score Description (column 1)", anns[1].description);
124 assertEquals("my Score_1", anns[1].label);
127 * Seq3 has two score values parsed out
129 // TODO parsed values (1.0 and 4.0) look wrong v description
130 // would expect 3 and 0.01
131 anns = al.getSequenceAt(3).getAnnotation();
132 assertEquals(2, anns.length);
133 assertEquals(1d, anns[0].getScore(), 0.001d);
134 assertEquals("my Score Description", anns[0].description);
135 assertEquals("my Score", anns[0].label);
136 assertEquals(4d, anns[1].getScore(), 0.001d);
137 assertEquals("my Score Description (column 1)", anns[1].description);
138 assertEquals("my Score_1", anns[1].label);
142 * Test with a regex that looks for numbers separated by words - as currently
143 * used in Jalview (May 2015)
145 * @see AlignFrame.extractScores_actionPerformed
148 public void testGetScoresFromDescription_wordBoundaries()
150 String regex = "\\W*([-+eE0-9.]+)";
151 List<SequenceI> seqs = al.getSequences();
152 seqs.get(0).setDescription("Ferredoxin");
153 seqs.get(1).setDescription(" Ferredoxin-1, chloroplast precursor");
154 seqs.get(2).setDescription("GH28E30p");
155 seqs.get(3).setDescription("At1g10960/T19D16_12");
156 final int count = pp.getScoresFromDescription("description column",
157 "score in description column ", regex, true);
158 assertEquals(3, count);
161 * No score parsable from seq1 description
163 AlignmentAnnotation[] anns = al.getSequenceAt(0).getAnnotation();
167 * Seq2 description has a '1' in it
169 anns = al.getSequenceAt(1).getAnnotation();
170 assertEquals(1, anns.length);
171 assertEquals(1d, anns[0].getScore(), 0.001d);
174 * Seq3 description has '28E30' in it
176 * Note: 1.8E308 or larger would result in 'Infinity'
178 anns = al.getSequenceAt(2).getAnnotation();
179 assertEquals(1, anns.length);
180 assertEquals(2.8E31d, anns[0].getScore(), 0.001d);
183 * Seq4 description has several numbers in it
185 anns = al.getSequenceAt(3).getAnnotation();
186 assertEquals(5, anns.length);
187 assertEquals(1d, anns[0].getScore(), 0.001d);
188 assertEquals(10960d, anns[1].getScore(), 0.001d);
189 assertEquals(19d, anns[2].getScore(), 0.001d);
190 assertEquals(16d, anns[3].getScore(), 0.001d);
191 assertEquals(12d, anns[4].getScore(), 0.001d);