2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.analysis;
23 import static org.testng.Assert.assertEquals;
24 import static org.testng.Assert.assertSame;
25 import static org.testng.Assert.assertTrue;
27 import jalview.bin.Cache;
28 import jalview.datamodel.Alignment;
29 import jalview.datamodel.AlignmentI;
30 import jalview.datamodel.ColumnSelection;
31 import jalview.datamodel.HiddenColumns;
32 import jalview.datamodel.SearchResultMatchI;
33 import jalview.datamodel.SearchResultsI;
34 import jalview.datamodel.Sequence;
35 import jalview.datamodel.SequenceGroup;
36 import jalview.gui.AlignFrame;
37 import jalview.gui.JvOptionPane;
38 import jalview.io.DataSourceType;
39 import jalview.io.FileLoader;
41 import java.util.List;
43 import org.testng.annotations.BeforeClass;
44 import org.testng.annotations.Test;
46 public class FinderTest
48 @BeforeClass(alwaysRun = true)
49 public void setUpJvOptionPane()
51 JvOptionPane.setInteractiveMode(false);
52 JvOptionPane.setMockResponse(JvOptionPane.CANCEL_OPTION);
55 private AlignFrame af;
57 private AlignmentI al;
59 @BeforeClass(groups = "Functional")
62 Cache.loadProperties("test/jalview/io/testProps.jvprops");
63 Cache.applicationProperties.setProperty("PAD_GAPS",
64 Boolean.FALSE.toString());
66 String seqData = "seq1seq1/8-18 ABCD--EF-GHIJI\n" + "seq2 A--BCDefHI\n"
67 + "seq3 --bcdEFH\n" + "seq4 aa---aMMMMMaaa\n";
68 af = new FileLoader().LoadFileWaitTillLoaded(seqData,
69 DataSourceType.PASTE);
70 al = af.getViewport().getAlignment();
74 * Test for find matches of a regular expression
76 @Test(groups = "Functional")
77 public void testFind_regex()
80 * find next match only
82 Finder f = new Finder(al, null);
83 f.find("E.H"); // 'E, any character, H'
84 // should match seq2 efH only
85 SearchResultsI sr = f.getSearchResults();
86 assertEquals(sr.getSize(), 1);
87 List<SearchResultMatchI> matches = sr.getResults();
88 assertSame(matches.get(0).getSequence(), al.getSequenceAt(1));
89 assertEquals(matches.get(0).getStart(), 5);
90 assertEquals(matches.get(0).getEnd(), 7);
92 f = new Finder(al, null);
94 f.find("E.H"); // 'E, any character, H'
95 // should match seq2 efH and seq3 EFH
96 sr = f.getSearchResults();
97 assertEquals(sr.getSize(), 2);
98 matches = sr.getResults();
99 assertSame(matches.get(0).getSequence(), al.getSequenceAt(1));
100 assertSame(matches.get(1).getSequence(), al.getSequenceAt(2));
101 assertEquals(matches.get(0).getStart(), 5);
102 assertEquals(matches.get(0).getEnd(), 7);
103 assertEquals(matches.get(1).getStart(), 4);
104 assertEquals(matches.get(1).getEnd(), 6);
108 * Test for (undocumented) find residue by position
110 @Test(groups = "Functional")
111 public void testFind_residueNumber()
113 Finder f = new Finder(al, null);
116 * find first match should return seq1 residue 9
119 SearchResultsI sr = f.getSearchResults();
120 assertEquals(sr.getSize(), 1);
121 List<SearchResultMatchI> matches = sr.getResults();
122 assertSame(matches.get(0).getSequence(), al.getSequenceAt(0));
123 assertEquals(matches.get(0).getStart(), 9);
124 assertEquals(matches.get(0).getEnd(), 9);
127 * find all matches should return seq1 and seq4 (others are too short)
129 f = new Finder(al, null);
132 sr = f.getSearchResults();
133 assertEquals(sr.getSize(), 2);
134 matches = sr.getResults();
135 assertSame(matches.get(0).getSequence(), al.getSequenceAt(0));
136 assertSame(matches.get(1).getSequence(), al.getSequenceAt(3));
137 assertEquals(matches.get(0).getStart(), 9);
138 assertEquals(matches.get(0).getEnd(), 9);
139 assertEquals(matches.get(1).getStart(), 9);
140 assertEquals(matches.get(1).getEnd(), 9);
143 * parsing of search string as integer is strict
145 f = new Finder(al, null);
147 assertTrue(f.getSearchResults().isEmpty());
151 * Test for find next action
153 @Test(groups = "Functional")
154 public void testFindNext()
157 * start at second sequence; colIndex of -1
158 * means sequence id / description is searched
160 Finder f = new Finder(al, null, 1, -1);
161 f.find("e"); // matches id
163 assertTrue(f.getSearchResults().isEmpty());
164 assertEquals(f.getIdMatch().size(), 1);
165 assertSame(f.getIdMatch().get(0), al.getSequenceAt(1));
167 // colIndex is now 0 - for use in next find next
168 // searching A--BCDefHI
169 assertEquals(f.getColumnIndex(), 0);
170 f = new Finder(al, null, 1, 0);
171 f.find("e"); // matches in sequence
172 assertTrue(f.getIdMatch().isEmpty());
173 assertEquals(f.getSearchResults().getSize(), 1);
174 List<SearchResultMatchI> matches = f.getSearchResults().getResults();
175 assertEquals(matches.get(0).getStart(), 5);
176 assertEquals(matches.get(0).getEnd(), 5);
177 assertSame(matches.get(0).getSequence(), al.getSequenceAt(1));
178 // still in the second sequence
179 assertEquals(f.getSequenceIndex(), 1);
180 // next column position to search from is 7
181 assertEquals(f.getColumnIndex(), 7);
183 // find next from end of sequence - finds next sequence id
184 f = new Finder(al, null, 1, 7);
186 assertEquals(f.getIdMatch().size(), 1);
187 assertSame(f.getIdMatch().get(0), al.getSequenceAt(2));
188 assertTrue(f.getSearchResults().isEmpty());
192 * Test for matching within sequence descriptions
194 @Test(groups = "Functional")
195 public void testFind_inDescription()
197 AlignmentI al2 = new Alignment(al);
198 al2.getSequenceAt(0).setDescription("BRAF");
199 al2.getSequenceAt(1).setDescription("braf");
202 * find first match only
204 Finder f = new Finder(al2, null);
205 f.setIncludeDescription(true);
207 assertEquals(f.getIdMatch().size(), 1);
208 assertSame(f.getIdMatch().get(0), al2.getSequenceAt(0));
209 assertTrue(f.getSearchResults().isEmpty());
214 f = new Finder(al2, null);
216 f.setIncludeDescription(true);
218 assertEquals(f.getIdMatch().size(), 2);
219 assertSame(f.getIdMatch().get(0), al2.getSequenceAt(0));
220 assertSame(f.getIdMatch().get(1), al2.getSequenceAt(1));
221 assertTrue(f.getSearchResults().isEmpty());
226 f = new Finder(al2, null);
228 f.setCaseSensitive(true);
229 f.setIncludeDescription(true);
232 assertEquals(f.getIdMatch().size(), 1);
233 assertSame(f.getIdMatch().get(0), al2.getSequenceAt(0));
234 assertTrue(f.getSearchResults().isEmpty());
237 * match sequence id, description and sequence!
239 al2.getSequenceAt(0).setDescription("the efh sequence");
240 al2.getSequenceAt(0).setName("mouseEFHkinase");
241 al2.getSequenceAt(1).setName("humanEFHkinase");
242 f = new Finder(al2, null);
244 f.setIncludeDescription(true);
247 * sequence matches should have no duplicates
250 assertEquals(f.getIdMatch().size(), 2);
251 assertSame(f.getIdMatch().get(0), al2.getSequenceAt(0));
252 assertSame(f.getIdMatch().get(1), al2.getSequenceAt(1));
254 assertEquals(f.getSearchResults().getSize(), 2);
255 SearchResultMatchI match = f.getSearchResults().getResults().get(0);
256 assertSame(match.getSequence(), al2.getSequenceAt(1));
257 assertEquals(match.getStart(), 5);
258 assertEquals(match.getEnd(), 7);
259 match = f.getSearchResults().getResults().get(1);
260 assertSame(match.getSequence(), al2.getSequenceAt(2));
261 assertEquals(match.getStart(), 4);
262 assertEquals(match.getEnd(), 6);
266 * Test for matching within sequence ids
268 @Test(groups = "Functional")
269 public void testFindAll_sequenceIds()
271 Finder f = new Finder(al, null);
275 * case insensitive; seq1 occurs twice in sequence id but
276 * only one match should be returned
279 assertEquals(f.getIdMatch().size(), 1);
280 assertSame(f.getIdMatch().get(0), al.getSequenceAt(0));
281 SearchResultsI searchResults = f.getSearchResults();
282 assertTrue(searchResults.isEmpty());
287 f = new Finder(al, null);
289 f.setCaseSensitive(true);
291 searchResults = f.getSearchResults();
292 assertTrue(searchResults.isEmpty());
295 * match both sequence id and sequence
297 AlignmentI al2 = new Alignment(al);
298 al2.addSequence(new Sequence("aBz", "xyzabZpqrAbZ"));
299 f = new Finder(al2, null);
302 assertEquals(f.getIdMatch().size(), 1);
303 assertSame(f.getIdMatch().get(0), al2.getSequenceAt(4));
304 searchResults = f.getSearchResults();
305 assertEquals(searchResults.getSize(), 2);
306 SearchResultMatchI match = searchResults.getResults().get(0);
307 assertSame(match.getSequence(), al2.getSequenceAt(4));
308 assertEquals(match.getStart(), 4);
309 assertEquals(match.getEnd(), 6);
310 match = searchResults.getResults().get(1);
311 assertSame(match.getSequence(), al2.getSequenceAt(4));
312 assertEquals(match.getStart(), 10);
313 assertEquals(match.getEnd(), 12);
317 * Test finding next match of a sequence pattern in an alignment
319 @Test(groups = "Functional")
320 public void testFind_findNext()
323 * efh should be matched in seq2 only
325 Finder f = new Finder(al, null);
327 SearchResultsI searchResults = f.getSearchResults();
328 assertEquals(searchResults.getSize(), 1);
329 SearchResultMatchI match = searchResults.getResults().get(0);
330 assertSame(match.getSequence(), al.getSequenceAt(1));
331 assertEquals(match.getStart(), 5);
332 assertEquals(match.getEnd(), 7);
335 * I should be found in seq1 (twice) and seq2 (once)
337 f = new Finder(al, null);
338 f.find("I"); // find next: seq1/16
339 searchResults = f.getSearchResults();
340 assertEquals(searchResults.getSize(), 1);
341 match = searchResults.getResults().get(0);
342 assertSame(match.getSequence(), al.getSequenceAt(0));
343 assertEquals(match.getStart(), 16);
344 assertEquals(match.getEnd(), 16);
346 f.find("I"); // find next: seq1/18
347 searchResults = f.getSearchResults();
348 assertEquals(searchResults.getSize(), 1);
349 match = searchResults.getResults().get(0);
350 assertSame(match.getSequence(), al.getSequenceAt(0));
351 assertEquals(match.getStart(), 18);
352 assertEquals(match.getEnd(), 18);
354 f.find("I"); // find next: seq2/8
355 searchResults = f.getSearchResults();
356 assertEquals(searchResults.getSize(), 1);
357 match = searchResults.getResults().get(0);
358 assertSame(match.getSequence(), al.getSequenceAt(1));
359 assertEquals(match.getStart(), 8);
360 assertEquals(match.getEnd(), 8);
363 assertTrue(f.getSearchResults().isEmpty());
367 * Test for JAL-2302 to verify that sub-matches are not included in a find all
370 @Test(groups = "Functional")
371 public void testFind_maximalResultOnly()
373 Finder f = new Finder(al, null);
376 SearchResultsI searchResults = f.getSearchResults();
377 assertEquals(searchResults.getSize(), 1);
378 SearchResultMatchI match = searchResults.getResults().get(0);
379 assertSame(match.getSequence(), al.getSequenceAt(3));
380 assertEquals(match.getStart(), 4); // dataset sequence positions
381 assertEquals(match.getEnd(), 8); // base 1
385 * Test finding all matches of a sequence pattern in an alignment
387 @Test(groups = "Functional")
388 public void testFind_findAll()
390 Finder f = new Finder(al, null);
393 SearchResultsI searchResults = f.getSearchResults();
394 assertEquals(searchResults.getSize(), 2);
395 SearchResultMatchI match = searchResults.getResults().get(0);
396 assertSame(match.getSequence(), al.getSequenceAt(1));
397 assertEquals(match.getStart(), 5);
398 assertEquals(match.getEnd(), 7);
399 match = searchResults.getResults().get(1);
400 assertSame(match.getSequence(), al.getSequenceAt(2));
401 assertEquals(match.getStart(), 4);
402 assertEquals(match.getEnd(), 6);
405 * find all I should find 2 positions in seq1, 1 in seq2
408 searchResults = f.getSearchResults();
409 assertEquals(searchResults.getSize(), 3);
410 match = searchResults.getResults().get(0);
411 assertSame(match.getSequence(), al.getSequenceAt(0));
412 assertEquals(match.getStart(), 16);
413 assertEquals(match.getEnd(), 16);
414 match = searchResults.getResults().get(1);
415 assertSame(match.getSequence(), al.getSequenceAt(0));
416 assertEquals(match.getStart(), 18);
417 assertEquals(match.getEnd(), 18);
418 match = searchResults.getResults().get(2);
419 assertSame(match.getSequence(), al.getSequenceAt(1));
420 assertEquals(match.getStart(), 8);
421 assertEquals(match.getEnd(), 8);
425 * Test finding all matches, case-sensitive
427 @Test(groups = "Functional")
428 public void testFind_findAllCaseSensitive()
430 Finder f = new Finder(al, null);
431 f.setCaseSensitive(true);
435 * BC should match seq1/9-10 and seq2/2-3
438 SearchResultsI searchResults = f.getSearchResults();
439 assertEquals(searchResults.getSize(), 2);
440 SearchResultMatchI match = searchResults.getResults().get(0);
441 assertSame(match.getSequence(), al.getSequenceAt(0));
442 assertEquals(match.getStart(), 9);
443 assertEquals(match.getEnd(), 10);
444 match = searchResults.getResults().get(1);
445 assertSame(match.getSequence(), al.getSequenceAt(1));
446 assertEquals(match.getStart(), 2);
447 assertEquals(match.getEnd(), 3);
450 * bc should match seq3/1-2
452 f = new Finder(al, null);
453 f.setCaseSensitive(true);
456 searchResults = f.getSearchResults();
457 assertEquals(searchResults.getSize(), 1);
458 match = searchResults.getResults().get(0);
459 assertSame(match.getSequence(), al.getSequenceAt(2));
460 assertEquals(match.getStart(), 1);
461 assertEquals(match.getEnd(), 2);
464 assertTrue(f.getSearchResults().isEmpty());
468 * Test finding next match of a sequence pattern in a selection group
470 @Test(groups = "Functional")
471 public void testFind_inSelection()
474 * select sequences 2 and 3, columns 4-6 which contains
478 SequenceGroup sg = new SequenceGroup();
481 sg.addSequence(al.getSequenceAt(1), false);
482 sg.addSequence(al.getSequenceAt(2), false);
484 Finder f = new Finder(al, sg);
486 assertTrue(f.getIdMatch().isEmpty());
487 SearchResultsI searchResults = f.getSearchResults();
488 assertEquals(searchResults.getSize(), 1);
489 SearchResultMatchI match = searchResults.getResults().get(0);
490 assertSame(match.getSequence(), al.getSequenceAt(1));
491 assertEquals(match.getStart(), 2);
492 assertEquals(match.getEnd(), 2);
495 * a second Find should not return the 'b' in seq3 as outside the selection
498 assertTrue(f.getSearchResults().isEmpty());
499 assertTrue(f.getIdMatch().isEmpty());
501 f = new Finder(al, sg);
503 assertTrue(f.getIdMatch().isEmpty());
504 searchResults = f.getSearchResults();
505 assertEquals(searchResults.getSize(), 1);
506 match = searchResults.getResults().get(0);
507 assertSame(match.getSequence(), al.getSequenceAt(1));
508 assertEquals(match.getStart(), 4);
509 assertEquals(match.getEnd(), 4);
511 assertTrue(f.getIdMatch().isEmpty());
512 searchResults = f.getSearchResults();
513 assertEquals(searchResults.getSize(), 1);
514 match = searchResults.getResults().get(0);
515 assertSame(match.getSequence(), al.getSequenceAt(2));
516 assertEquals(match.getStart(), 3);
517 assertEquals(match.getEnd(), 3);
521 * Test finding all matches of a search pattern in a selection group
523 @Test(groups = "Functional")
524 public void testFind_findAllInSelection()
527 * select sequences 2 and 3, columns 4-6 which contains
531 SequenceGroup sg = new SequenceGroup();
534 sg.addSequence(al.getSequenceAt(1), false);
535 sg.addSequence(al.getSequenceAt(2), false);
538 * search for 'e' should match two sequence ids and one residue
540 Finder f = new Finder(al, sg);
543 assertEquals(f.getIdMatch().size(), 2);
544 assertSame(f.getIdMatch().get(0), al.getSequenceAt(1));
545 assertSame(f.getIdMatch().get(1), al.getSequenceAt(2));
546 SearchResultsI searchResults = f.getSearchResults();
547 assertEquals(searchResults.getSize(), 1);
548 SearchResultMatchI match = searchResults.getResults().get(0);
549 assertSame(match.getSequence(), al.getSequenceAt(2));
550 assertEquals(match.getStart(), 4);
551 assertEquals(match.getEnd(), 4);
554 * search for 'Q' should match two sequence ids only
556 f = new Finder(al, sg);
559 assertEquals(f.getIdMatch().size(), 2);
560 assertSame(f.getIdMatch().get(0), al.getSequenceAt(1));
561 assertSame(f.getIdMatch().get(1), al.getSequenceAt(2));
562 assertTrue(f.getSearchResults().isEmpty());
566 * Test finding in selection with a sequence too short to reach it
568 @Test(groups = "Functional")
569 public void testFind_findAllInSelectionWithShortSequence()
572 * select all sequences, columns 10-12
576 SequenceGroup sg = new SequenceGroup();
579 sg.addSequence(al.getSequenceAt(0), false);
580 sg.addSequence(al.getSequenceAt(1), false);
581 sg.addSequence(al.getSequenceAt(2), false);
582 sg.addSequence(al.getSequenceAt(3), false);
585 * search for 'I' should match two sequence positions
587 Finder f = new Finder(al, sg);
590 assertTrue(f.getIdMatch().isEmpty());
591 SearchResultsI searchResults = f.getSearchResults();
592 assertEquals(searchResults.getSize(), 2);
593 SearchResultMatchI match = searchResults.getResults().get(0);
594 assertSame(match.getSequence(), al.getSequenceAt(0));
595 assertEquals(match.getStart(), 16);
596 assertEquals(match.getEnd(), 16);
597 match = searchResults.getResults().get(1);
598 assertSame(match.getSequence(), al.getSequenceAt(1));
599 assertEquals(match.getStart(), 8);
600 assertEquals(match.getEnd(), 8);
604 * Test that find does not report hidden positions
606 @Test(groups = "Functional")
607 public void testFind_withHiddenColumns()
618 * hide 2-4 (CD- -BC bcd ---)
620 HiddenColumns hc = new HiddenColumns();
621 hc.hideColumns(2, 4);
622 al.setHiddenColumns(hc);
625 * find all search for D should ignore hidden positions in seq1 and seq3,
626 * find the visible D in seq2
628 Finder f = new Finder(al, null);
631 SearchResultsI searchResults = f.getSearchResults();
632 assertEquals(searchResults.getSize(), 1);
633 SearchResultMatchI match = searchResults.getResults().get(0);
634 assertSame(match.getSequence(), al.getSequenceAt(1));
635 assertEquals(match.getStart(), 4);
636 assertEquals(match.getEnd(), 4);
640 * find all 'aaa' should find end of seq4 only
642 hc.hideColumns(2, 5);
643 f = new Finder(al, null);
645 searchResults = f.getSearchResults();
646 assertEquals(searchResults.getSize(), 1);
647 match = searchResults.getResults().get(0);
648 assertSame(match.getSequence(), al.getSequenceAt(3));
649 assertEquals(match.getStart(), 9);
650 assertEquals(match.getEnd(), 11);
653 * find all 'BE' should not match across hidden columns in seq1
656 assertTrue(f.getSearchResults().isEmpty());
659 * boundary case: hide columns at end of alignment
660 * search for H should match seq3/6 only
662 hc.revealAllHiddenColumns(new ColumnSelection());
663 hc.hideColumns(8, 13);
664 f = new Finder(al, null);
666 searchResults = f.getSearchResults();
667 assertEquals(searchResults.getSize(), 1);
668 match = searchResults.getResults().get(0);
669 assertSame(match.getSequence(), al.getSequenceAt(2));
670 assertEquals(match.getStart(), 6);
671 assertEquals(match.getEnd(), 6);