2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.analysis;
23 import jalview.api.AlignViewportI;
24 import jalview.api.FinderI;
25 import jalview.datamodel.AlignmentI;
26 import jalview.datamodel.Range;
27 import jalview.datamodel.SearchResultMatchI;
28 import jalview.datamodel.SearchResults;
29 import jalview.datamodel.SearchResultsI;
30 import jalview.datamodel.SequenceGroup;
31 import jalview.datamodel.SequenceI;
32 import jalview.datamodel.VisibleContigsIterator;
33 import jalview.util.Comparison;
35 import java.util.List;
36 import java.util.Vector;
38 import com.stevesoft.pat.Regex;
41 * Implements the search algorithm for the Find dialog
43 public class Finder implements FinderI
46 * matched residue locations
48 private SearchResultsI searchResults;
51 * sequences matched by id or description
53 private Vector<SequenceI> idMatches;
56 * the viewport to search over
58 private AlignViewportI viewport;
61 * sequence index in alignment to search from
63 private int sequenceIndex;
66 * column position in sequence to search from, base 0
67 * - absolute column number including any hidden columns
68 * (position after start of last match for a repeat search)
70 private int columnIndex;
73 * Constructor for searching a viewport
77 public Finder(AlignViewportI av)
80 this.sequenceIndex = 0;
81 this.columnIndex = -1;
85 public void findAll(String theSearchString, boolean matchCase,
86 boolean searchDescription)
89 * search from the start
94 doFind(theSearchString, matchCase, searchDescription, true);
97 * reset to start for next search
104 public void findNext(String theSearchString, boolean matchCase,
105 boolean searchDescription)
107 doFind(theSearchString, matchCase, searchDescription, false);
109 if (searchResults.isEmpty() && idMatches.isEmpty())
112 * search failed - reset to start for next search
120 * Performs a 'find next' or 'find all'
122 * @param theSearchString
124 * @param searchDescription
127 protected void doFind(String theSearchString, boolean matchCase,
128 boolean searchDescription, boolean findAll)
130 String searchString = matchCase ? theSearchString
131 : theSearchString.toUpperCase();
132 Regex searchPattern = new Regex(searchString);
133 searchPattern.setIgnoreCase(!matchCase);
135 searchResults = new SearchResults();
136 idMatches = new Vector<>();
138 SequenceGroup selection = viewport.getSelectionGroup();
139 if (selection != null && selection.getSize() < 1)
141 selection = null; // ? ignore column-only selection
144 AlignmentI alignment = viewport.getAlignment();
145 int end = alignment.getHeight();
147 while (sequenceIndex < end)
149 SequenceI seq = alignment.getSequenceAt(sequenceIndex);
150 boolean found = findNextMatch(seq, searchString, searchPattern,
152 if (found && !findAll)
165 * Answers the start-end column range of the contiguous visible regions of
166 * {@code sequence} starting at or after the given {@code column}. If there are
167 * no hidden columns, this just returns the remaining width of the sequence.
168 * Otherwise, visible columns are added as long as they are contiguous on the
169 * sequence (hidden regions only contain gaps). The range is restricted to the
170 * current {@code selection} if there is one. Answers null if there are no
171 * visible columns at or after {@code column}.
177 protected Range getNextVisibleSequenceRegion(SequenceI sequence,
180 AlignmentI alignment = viewport.getAlignment();
181 VisibleContigsIterator visibleRegions = alignment.getHiddenColumns()
182 .getVisContigsIterator(column, alignment.getWidth(),
184 if (!visibleRegions.hasNext())
186 // off the end of the sequence - force search to next sequence
187 columnIndex = sequence.getLength();
191 int[] visible = visibleRegions.next();
192 int seqColStart = Math.max(column, visible[0]);
193 int seqColEnd = visible[1];
194 // end residue of region (next residue if end position is gapped)
195 int endSeqPos = sequence.findPosition(visible[1]);
196 if (Comparison.isGap(sequence.getCharAt(visible[1])))
200 while (visibleRegions.hasNext())
202 visible = visibleRegions.next();
203 int startSeqPos = sequence.findPosition(visible[0]);
204 if (startSeqPos - endSeqPos > 1)
206 // this visible region is not contiguous - ignore it
209 endSeqPos = sequence.findPosition(visible[1]);
210 seqColEnd = visible[1];
212 seqColEnd = Math.min(sequence.getLength() - 1, seqColEnd);
215 * restrict search to selected region if there is one
217 SequenceGroup selection = viewport.getSelectionGroup();
218 if (selection != null)
220 int selectionStart = selection.getStartRes();
221 int selectionEnd = selection.getEndRes();
222 if (selectionStart > seqColEnd || selectionEnd < seqColStart)
225 * sequence region doesn't overlap selection region
227 columnIndex = seqColEnd + 1;
230 seqColStart = Math.max(seqColStart, selectionStart);
231 seqColEnd = Math.min(seqColEnd, selectionEnd);
234 return new Range(seqColStart, seqColEnd);
238 * Finds the next match in the given sequence, starting at column at
239 * <code>columnIndex</code>. Answers true if a match is found, else false. If
240 * a match is found, <code>columnIndex</code> is advanced to the column after
241 * the start of the matched region, ready for a search from the next position.
244 * @param searchString
245 * @param searchPattern
246 * @param matchDescription
249 protected boolean findNextMatch(SequenceI seq, String searchString,
250 Regex searchPattern, boolean matchDescription)
252 SequenceGroup selection = viewport.getSelectionGroup();
253 if (selection != null && !selection.contains(seq))
256 * this sequence is not in the selection - advance to next sequence
264 * at start of sequence; try find by residue number, in sequence id,
265 * or (optionally) in sequence description
267 if (doNonMotifSearches(seq, searchString, searchPattern,
275 * search for next match in sequence string
277 int end = seq.getLength();
278 while (columnIndex < end)
280 if (searchNextVisibleRegion(seq, searchPattern))
289 * Searches the sequence, starting from <code>columnIndex</code>, and adds the
290 * next match (if any) to <code>searchResults</code>. The search is restricted
291 * to the next visible column region, and to the <code>selection</code> region
292 * if there is one. Answers true if a match is added, else false.
295 * @param searchPattern
298 protected boolean searchNextVisibleRegion(SequenceI seq, Regex searchPattern)
300 Range visible = getNextVisibleSequenceRegion(seq, columnIndex);
305 String seqString = seq.getSequenceAsString(visible.start, visible.end + 1);
306 String noGaps = AlignSeq.extractGaps(Comparison.GapChars, seqString);
308 if (searchPattern.search(noGaps))
310 int sequenceStartPosition = seq.findPosition(visible.start);
311 recordMatch(seq, searchPattern, sequenceStartPosition);
317 * no match - advance columnIndex past this visible region
318 * so the next visible region (if any) is searched next
320 columnIndex = visible.end + 1;
327 * Adds the match held in the <code>searchPattern</code> Regex to the
328 * <code>searchResults</code>, unless it is a subregion of the last match
329 * recorded. <code>columnIndex</code> is advanced to the position after the
330 * start of the matched region, ready for the next search. Answers true if a
331 * match was added, else false.
334 * @param searchPattern
335 * @param firstResiduePosition
338 protected boolean recordMatch(SequenceI seq, Regex searchPattern,
339 int firstResiduePosition)
342 * get start/end of the match in sequence coordinates
344 int offset = searchPattern.matchedFrom();
345 int matchStartPosition = firstResiduePosition + offset;
346 int matchEndPosition = matchStartPosition
347 + searchPattern.charsMatched() - 1;
350 * update columnIndex to next column after the start of the match
351 * (findIndex returns a value base 1, columnIndex is held base 0)
353 columnIndex = seq.findIndex(matchStartPosition);
356 * check that this match is not a subset of the previous one (JAL-2302)
358 List<SearchResultMatchI> matches = searchResults.getResults();
359 SearchResultMatchI lastMatch = matches.isEmpty() ? null
360 : matches.get(matches.size() - 1);
362 if (lastMatch == null || !lastMatch.contains(seq, matchStartPosition,
365 searchResults.addResult(seq, matchStartPosition, matchEndPosition);
373 * Does searches other than for residue patterns. Currently this includes
375 * <li>find residue by position (if search string is a number)</li>
376 * <li>match search string to sequence id</li>
377 * <li>match search string to sequence description (optional)</li>
379 * Answers true if a match is found, else false.
382 * @param searchString
383 * @param searchPattern
384 * @param includeDescription
387 protected boolean doNonMotifSearches(SequenceI seq, String searchString,
388 Regex searchPattern, boolean includeDescription)
391 * position sequence search to start of sequence
395 if (searchForResidueNumber(seq, searchString))
399 if (searchSequenceName(seq, searchPattern))
403 if (includeDescription && searchSequenceDescription(seq, searchPattern))
411 * Searches for a match with the sequence description, and if found, adds the
412 * sequence to the list of match ids (but not as a duplicate). Answers true if
413 * a match was added, else false.
416 * @param searchPattern
419 protected boolean searchSequenceDescription(SequenceI seq, Regex searchPattern)
421 String desc = seq.getDescription();
422 if (desc != null && searchPattern.search(desc) && !idMatches.contains(seq))
424 idMatches.addElement(seq);
431 * Searches for a match with the sequence name, and if found, adds the
432 * sequence to the list of match ids (but not as a duplicate). Answers true if
433 * a match was added, else false.
436 * @param searchPattern
439 protected boolean searchSequenceName(SequenceI seq, Regex searchPattern)
441 if (searchPattern.search(seq.getName()) && !idMatches.contains(seq))
443 idMatches.addElement(seq);
450 * Tries to interpret the search string as a residue position, and if valid,
451 * adds the position to the search results and returns true, else answers
454 protected boolean searchForResidueNumber(SequenceI seq, String searchString)
458 int res = Integer.parseInt(searchString);
459 if (seq.getStart() <= res && seq.getEnd() >= res)
461 searchResults.addResult(seq, res, res);
464 } catch (NumberFormatException ex)
471 * @see jalview.analysis.FinderI#getIdMatch()
474 public Vector<SequenceI> getIdMatches()
480 * @see jalview.analysis.FinderI#getSearchResults()
483 public SearchResultsI getSearchResults()
485 return searchResults;