/* * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) * Copyright (C) $$Year-Rel$$ The Jalview Authors * * This file is part of Jalview. * * Jalview is free software: you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation, either version 3 * of the License, or (at your option) any later version. * * Jalview is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty * of MERCHANTABILITY or FITNESS FOR A PARTICULAR * PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Jalview. If not, see . * The Jalview Authors are detailed in the 'AUTHORS' file. */ package jalview.analysis; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertSame; import static org.testng.Assert.assertTrue; import jalview.datamodel.Alignment; import jalview.datamodel.AlignmentI; import jalview.datamodel.SearchResultMatchI; import jalview.datamodel.SearchResultsI; import jalview.datamodel.Sequence; import jalview.gui.AlignFrame; import jalview.io.FileLoader; import jalview.io.FormatAdapter; import java.util.List; import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; public class FinderTest { private AlignFrame af; private AlignmentI al; @BeforeClass(groups = "Functional") public void setUp() { String seqData = "seq1 ABCD--EF-GHI\n" + "seq2 A--BCDefHI\n" + "seq3 --bcdEFH\n" + "seq4 aa---aMMMMMaaa\n"; af = new FileLoader().LoadFileWaitTillLoaded(seqData, FormatAdapter.PASTE); al = af.getViewport().getAlignment(); } /** * Test for find all matches of a regular expression */ @Test(groups = "Functional") public void testFindAll_regex() { Finder f = new Finder(al, null); f.setFindAll(true); f.find("E.H"); // 'E, any character, H' // should match seq2 efH and seq3 EFH SearchResultsI sr = f.getSearchResults(); assertEquals(sr.getSize(), 2); List matches = sr.getResults(); assertSame(al.getSequenceAt(1), matches.get(0).getSequence()); assertSame(al.getSequenceAt(2), matches.get(1).getSequence()); assertEquals(matches.get(0).getStart(), 5); assertEquals(matches.get(0).getEnd(), 7); assertEquals(matches.get(1).getStart(), 4); assertEquals(matches.get(1).getEnd(), 6); } /** * Test for (undocumented) find residue by position */ @Test(groups = "Functional") public void testFind_residueNumber() { Finder f = new Finder(al, null); f.setFindAll(true); f.find("9"); // seq1 and seq4 have 9 residues; no match in other sequences SearchResultsI sr = f.getSearchResults(); assertEquals(sr.getSize(), 2); List matches = sr.getResults(); assertSame(al.getSequenceAt(0), matches.get(0).getSequence()); assertSame(al.getSequenceAt(3), matches.get(1).getSequence()); assertEquals(matches.get(0).getStart(), 9); assertEquals(matches.get(0).getEnd(), 9); assertEquals(matches.get(1).getStart(), 9); assertEquals(matches.get(1).getEnd(), 9); } /** * Test for find next action */ @Test(groups = "Functional") public void testFindNext() { /* * start at second sequence; resIndex of -1 * means sequence id / description is searched */ Finder f = new Finder(al, null, 1, -1); f.find("e"); // matches id assertTrue(f.getSearchResults().isEmpty()); assertEquals(f.getIdMatch().size(), 1); assertSame(f.getIdMatch().get(0), al.getSequenceAt(1)); // resIndex is now 0 - for use in next find next assertEquals(f.getResIndex(), 0); f = new Finder(al, null, 1, 0); f.find("e"); // matches in sequence assertTrue(f.getIdMatch().isEmpty()); assertEquals(f.getSearchResults().getSize(), 1); List matches = f.getSearchResults().getResults(); assertEquals(matches.get(0).getStart(), 5); assertEquals(matches.get(0).getEnd(), 5); assertSame(matches.get(0).getSequence(), al.getSequenceAt(1)); // still in the second sequence assertEquals(f.getSeqIndex(), 1); // next residue position to search from is 5 // (used as base 0 by RegEx so the same as 6 if base 1) assertEquals(f.getResIndex(), 5); // find next from end of sequence - finds next sequence id f = new Finder(al, null, 1, 5); f.find("e"); assertEquals(f.getIdMatch().size(), 1); assertSame(f.getIdMatch().get(0), al.getSequenceAt(2)); } /** * Test for matching within sequence descriptions */ @Test(groups = "Functional") public void testFindAll_inDescription() { AlignmentI al2 = new Alignment(al); al2.getSequenceAt(0).setDescription("BRAF"); al2.getSequenceAt(1).setDescription("braf"); Finder f = new Finder(al2, null); f.setFindAll(true); f.setIncludeDescription(true); f.find("rAF"); assertEquals(f.getIdMatch().size(), 2); assertSame(f.getIdMatch().get(0), al2.getSequenceAt(0)); assertSame(f.getIdMatch().get(1), al2.getSequenceAt(1)); assertTrue(f.getSearchResults().isEmpty()); /* * case sensitive */ f = new Finder(al2, null); f.setFindAll(true); f.setCaseSensitive(true); f.setIncludeDescription(true); f.find("RAF"); assertEquals(f.getIdMatch().size(), 1); assertSame(f.getIdMatch().get(0), al2.getSequenceAt(0)); assertTrue(f.getSearchResults().isEmpty()); /* * match sequence id, description and sequence! */ al2.getSequenceAt(0).setDescription("the efh sequence"); al2.getSequenceAt(0).setName("mouseEFHkinase"); al2.getSequenceAt(1).setName("humanEFHkinase"); f = new Finder(al2, null); f.setFindAll(true); f.setIncludeDescription(true); /* * sequence matches should have no duplicates */ f.find("EFH"); assertEquals(f.getIdMatch().size(), 2); assertSame(f.getIdMatch().get(0), al2.getSequenceAt(0)); assertSame(f.getIdMatch().get(1), al2.getSequenceAt(1)); assertEquals(f.getSearchResults().getSize(), 2); SearchResultMatchI match = f.getSearchResults().getResults().get(0); assertSame(al2.getSequenceAt(1), match.getSequence()); assertEquals(5, match.getStart()); assertEquals(7, match.getEnd()); match = f.getSearchResults().getResults().get(1); assertSame(al2.getSequenceAt(2), match.getSequence()); assertEquals(4, match.getStart()); assertEquals(6, match.getEnd()); } /** * Test for matching within sequence ids */ @Test(groups = "Functional") public void testFindAll_sequenceIds() { Finder f = new Finder(al, null); f.setFindAll(true); /* * case insensitive */ f.find("SEQ1"); assertEquals(f.getIdMatch().size(), 1); assertSame(f.getIdMatch().get(0), al.getSequenceAt(0)); assertTrue(f.getSearchResults().isEmpty()); /* * case sensitive */ f = new Finder(al, null); f.setFindAll(true); f.setCaseSensitive(true); f.find("SEQ1"); assertTrue(f.getSearchResults().isEmpty()); /* * match both sequence id and sequence */ AlignmentI al2 = new Alignment(al); al2.addSequence(new Sequence("aBz", "xyzabZpqrAbZ")); f = new Finder(al2, null); f.setFindAll(true); f.find("ABZ"); assertEquals(f.getIdMatch().size(), 1); assertSame(f.getIdMatch().get(0), al2.getSequenceAt(4)); assertEquals(f.getSearchResults().getSize(), 2); SearchResultMatchI match = f.getSearchResults().getResults().get(0); assertSame(al2.getSequenceAt(4), match.getSequence()); assertEquals(4, match.getStart()); assertEquals(6, match.getEnd()); match = f.getSearchResults().getResults().get(1); assertSame(al2.getSequenceAt(4), match.getSequence()); assertEquals(10, match.getStart()); assertEquals(12, match.getEnd()); } /** * Test finding all matches of a sequence pattern in an alignment */ @Test(groups = "Functional") public void testFindAll_simpleMatch() { Finder f = new Finder(al, null); f.setFindAll(true); /* * case insensitive first */ f.find("EfH"); SearchResultsI searchResults = f.getSearchResults(); assertEquals(searchResults.getSize(), 2); SearchResultMatchI match = searchResults.getResults().get(0); assertSame(al.getSequenceAt(1), match.getSequence()); assertEquals(5, match.getStart()); assertEquals(7, match.getEnd()); match = searchResults.getResults().get(1); assertSame(al.getSequenceAt(2), match.getSequence()); assertEquals(4, match.getStart()); assertEquals(6, match.getEnd()); /* * case sensitive */ f = new Finder(al, null); f.setFindAll(true); f.setCaseSensitive(true); f.find("BC"); searchResults = f.getSearchResults(); assertEquals(searchResults.getSize(), 2); match = searchResults.getResults().get(0); assertSame(al.getSequenceAt(0), match.getSequence()); assertEquals(2, match.getStart()); assertEquals(3, match.getEnd()); match = searchResults.getResults().get(1); assertSame(al.getSequenceAt(1), match.getSequence()); assertEquals(2, match.getStart()); assertEquals(3, match.getEnd()); } /** * Test for JAL-2302 to verify that sub-matches are not included in a find all * result */ @Test(groups = "Functional") public void testFind_maximalResultOnly() { Finder f = new Finder(al, null); f.setFindAll(true); f.find("M+"); SearchResultsI searchResults = f.getSearchResults(); assertEquals(searchResults.getSize(), 1); SearchResultMatchI match = searchResults.getResults().get(0); assertSame(al.getSequenceAt(3), match.getSequence()); assertEquals(4, match.getStart()); // dataset sequence positions assertEquals(8, match.getEnd()); // base 1 } }