From 319d88f9dc025099158ac9193f5af7d827ea474b Mon Sep 17 00:00:00 2001 From: gmungoc Date: Fri, 29 Nov 2019 15:53:26 +0000 Subject: [PATCH] JAL-3490 Finder match across entirely gapped hidden regions --- src/jalview/analysis/Finder.java | 57 ++++++++++++++++++++----------- test/jalview/analysis/FinderTest.java | 60 +++++++++++++++++++++++++++++---- 2 files changed, 91 insertions(+), 26 deletions(-) diff --git a/src/jalview/analysis/Finder.java b/src/jalview/analysis/Finder.java index 3cbef6d..ab71894 100644 --- a/src/jalview/analysis/Finder.java +++ b/src/jalview/analysis/Finder.java @@ -162,35 +162,54 @@ public class Finder implements FinderI } /** - * Answers the start-end column range of the visible region of - * sequence starting at or after the given column. - * If there are no hidden columns, this just returns the remaining width of - * the sequence. The range is restricted to the current selection - * if there is one. Answers null if there are no visible columns at or after - * column. + * Answers the start-end column range of the contiguous visible regions of + * {@code sequence} starting at or after the given {@code column}. If there are + * no hidden columns, this just returns the remaining width of the sequence. + * Otherwise, visible columns are added as long as they are contiguous on the + * sequence (hidden regions only contain gaps). The range is restricted to the + * current {@code selection} if there is one. Answers null if there are no + * visible columns at or after {@code column}. + * + * @param sequence + * @param column + * @return */ protected Range getNextVisibleSequenceRegion(SequenceI sequence, - int column) + final int column) { - int seqColStart = column; - int seqColEnd = sequence.getLength() - 1; - - /* - * restrict search to (next) visible column region, - * in case there are hidden columns - */ AlignmentI alignment = viewport.getAlignment(); VisibleContigsIterator visibleRegions = alignment.getHiddenColumns() .getVisContigsIterator(column, alignment.getWidth(), false); - int[] visible = visibleRegions.hasNext() ? visibleRegions.next() : null; - if (visible == null) + if (!visibleRegions.hasNext()) { - columnIndex = seqColEnd + 1; + // off the end of the sequence - force search to next sequence + columnIndex = sequence.getLength(); return null; } - seqColStart = Math.max(seqColStart, visible[0]); - seqColEnd = Math.min(seqColEnd, visible[1]); + + int[] visible = visibleRegions.next(); + int seqColStart = Math.max(column, visible[0]); + int seqColEnd = visible[1]; + // end residue of region (next residue if end position is gapped) + int endSeqPos = sequence.findPosition(visible[1]); + if (Comparison.isGap(sequence.getCharAt(visible[1]))) + { + endSeqPos--; + } + while (visibleRegions.hasNext()) + { + visible = visibleRegions.next(); + int startSeqPos = sequence.findPosition(visible[0]); + if (startSeqPos - endSeqPos > 1) + { + // this visible region is not contiguous - ignore it + break; + } + endSeqPos = sequence.findPosition(visible[1]); + seqColEnd = visible[1]; + } + seqColEnd = Math.min(sequence.getLength() - 1, seqColEnd); /* * restrict search to selected region if there is one diff --git a/test/jalview/analysis/FinderTest.java b/test/jalview/analysis/FinderTest.java index 5f64b28..f3ae69f 100644 --- a/test/jalview/analysis/FinderTest.java +++ b/test/jalview/analysis/FinderTest.java @@ -71,7 +71,7 @@ public class FinderTest Cache.applicationProperties.setProperty("PAD_GAPS", Boolean.FALSE.toString()); - String seqData = "seq1seq1/8-18 ABCD--EF-GHIJI\n" + "seq2 A--BCDefHI\n" + String seqData = "seq1/8-18 ABCD--EF-GHIJI\n" + "seq2 A--BCDefHI\n" + "seq3 --bcdEFH\n" + "seq4 aa---aMMMMMaaa\n"; af = new FileLoader().LoadFileWaitTillLoaded(seqData, DataSourceType.PASTE); @@ -615,7 +615,8 @@ public class FinderTest } /** - * Test that find does not report hidden positions + * Test that find does not report hidden positions, but does report matches that + * span hidden gaps */ @Test(groups = "Functional") public void testFind_withHiddenColumns() @@ -629,28 +630,73 @@ public class FinderTest */ /* - * hide 2-4 (CD- -BC bcd ---) + * hide column 3 only, search for aaa + * should find two matches: aa-[-]-aa and trailing aaa */ HiddenColumns hc = new HiddenColumns(); - hc.hideColumns(2, 4); + hc.hideColumns(3, 3); al.setHiddenColumns(hc); + Finder f = new Finder(av); + f.findAll("aaa", false, false); + SearchResultsI searchResults = f.getSearchResults(); + assertEquals(searchResults.getSize(), 2); + SearchResultMatchI match = searchResults.getResults().get(0); + assertSame(match.getSequence(), al.getSequenceAt(3)); + assertEquals(match.getStart(), 1); + assertEquals(match.getEnd(), 3); + match = searchResults.getResults().get(1); + assertSame(match.getSequence(), al.getSequenceAt(3)); + assertEquals(match.getStart(), 9); + assertEquals(match.getEnd(), 11); + + /* + * hide 2-4 (CD- -BC bcd ---) + */ + hc.hideColumns(2, 4); /* * find all search for D should ignore hidden positions in seq1 and seq3, * find the visible D in seq2 */ - Finder f = new Finder(av); + f = new Finder(av); f.findAll("D", false, false); - SearchResultsI searchResults = f.getSearchResults(); + searchResults = f.getSearchResults(); assertEquals(searchResults.getSize(), 1); - SearchResultMatchI match = searchResults.getResults().get(0); + match = searchResults.getResults().get(0); assertSame(match.getSequence(), al.getSequenceAt(1)); assertEquals(match.getStart(), 4); assertEquals(match.getEnd(), 4); /* + * search for AD should fail although these are now + * consecutive in the visible columns + */ + f = new Finder(av); + f.findAll("AD", false, false); + searchResults = f.getSearchResults(); + assertTrue(searchResults.isEmpty()); + + /* + * find all 'aaa' should find both start and end of seq4 + * (first run includes hidden gaps) + */ + f = new Finder(av); + f.findAll("aaa", false, false); + searchResults = f.getSearchResults(); + assertEquals(searchResults.getSize(), 2); + match = searchResults.getResults().get(0); + assertSame(match.getSequence(), al.getSequenceAt(3)); + assertEquals(match.getStart(), 1); + assertEquals(match.getEnd(), 3); + match = searchResults.getResults().get(1); + assertSame(match.getSequence(), al.getSequenceAt(3)); + assertEquals(match.getStart(), 9); + assertEquals(match.getEnd(), 11); + + /* * hide columns 2-5: * find all 'aaa' should find end of seq4 only + * (hidden columns not all gapped) */ hc.hideColumns(2, 5); f = new Finder(av); -- 1.7.10.2