2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.analysis;
23 import jalview.api.AlignViewportI;
24 import jalview.api.FinderI;
25 import jalview.datamodel.AlignmentI;
26 import jalview.datamodel.Range;
27 import jalview.datamodel.SearchResultMatchI;
28 import jalview.datamodel.SearchResults;
29 import jalview.datamodel.SearchResultsI;
30 import jalview.datamodel.SequenceGroup;
31 import jalview.datamodel.SequenceI;
32 import jalview.datamodel.VisibleContigsIterator;
33 import jalview.util.Comparison;
34 import jalview.util.Platform;
36 import java.util.List;
37 import java.util.Vector;
39 import com.stevesoft.pat.Regex;
42 * Implements the search algorithm for the Find dialog
44 public class Finder implements FinderI
47 * matched residue locations
49 private SearchResultsI searchResults;
52 * sequences matched by id or description
54 private Vector<SequenceI> idMatches;
57 * the viewport to search over
59 private AlignViewportI viewport;
62 * sequence index in alignment to search from
64 private int sequenceIndex;
67 * column position in sequence to search from, base 0
68 * - absolute column number including any hidden columns
69 * (position after start of last match for a repeat search)
71 private int columnIndex;
74 * Constructor for searching a viewport
78 public Finder(AlignViewportI av)
81 this.sequenceIndex = 0;
82 this.columnIndex = -1;
86 public void findAll(String theSearchString, boolean matchCase,
87 boolean searchDescription)
90 * search from the start
95 doFind(theSearchString, matchCase, searchDescription, true);
98 * reset to start for next search
105 public void findNext(String theSearchString, boolean matchCase,
106 boolean searchDescription)
108 doFind(theSearchString, matchCase, searchDescription, false);
110 if (searchResults.isEmpty() && idMatches.isEmpty())
113 * search failed - reset to start for next search
121 * Performs a 'find next' or 'find all'
123 * @param theSearchString
125 * @param searchDescription
128 protected void doFind(String theSearchString, boolean matchCase,
129 boolean searchDescription, boolean findAll)
131 String searchString = matchCase ? theSearchString
132 : theSearchString.toUpperCase();
133 Regex searchPattern = Platform.newRegex(searchString);
134 searchPattern.setIgnoreCase(!matchCase);
136 searchResults = new SearchResults();
137 idMatches = new Vector<>();
139 SequenceGroup selection = viewport.getSelectionGroup();
140 if (selection != null && selection.getSize() < 1)
142 selection = null; // ? ignore column-only selection
145 AlignmentI alignment = viewport.getAlignment();
146 int end = alignment.getHeight();
148 while (sequenceIndex < end)
150 SequenceI seq = alignment.getSequenceAt(sequenceIndex);
151 boolean found = findNextMatch(seq, searchString, searchPattern,
153 if (found && !findAll)
166 * Answers the start-end column range of the visible region of
167 * <code>sequence</code> starting at or after the given <code>column</code>.
168 * If there are no hidden columns, this just returns the remaining width of
169 * the sequence. The range is restricted to the current <code>selection</code>
170 * if there is one. Answers null if there are no visible columns at or after
171 * <code>column</code>.
173 protected Range getNextVisibleSequenceRegion(SequenceI sequence,
176 int seqColStart = column;
177 int seqColEnd = sequence.getLength() - 1;
180 * restrict search to (next) visible column region,
181 * in case there are hidden columns
183 AlignmentI alignment = viewport.getAlignment();
184 VisibleContigsIterator visibleRegions = alignment.getHiddenColumns()
185 .getVisContigsIterator(column, alignment.getWidth(),
187 int[] visible = visibleRegions.hasNext() ? visibleRegions.next() : null;
190 columnIndex = seqColEnd + 1;
193 seqColStart = Math.max(seqColStart, visible[0]);
194 seqColEnd = Math.min(seqColEnd, visible[1]);
197 * restrict search to selected region if there is one
199 SequenceGroup selection = viewport.getSelectionGroup();
200 if (selection != null)
202 int selectionStart = selection.getStartRes();
203 int selectionEnd = selection.getEndRes();
204 if (selectionStart > seqColEnd || selectionEnd < seqColStart)
207 * sequence region doesn't overlap selection region
209 columnIndex = seqColEnd + 1;
212 seqColStart = Math.max(seqColStart, selectionStart);
213 seqColEnd = Math.min(seqColEnd, selectionEnd);
216 return new Range(seqColStart, seqColEnd);
220 * Finds the next match in the given sequence, starting at column at
221 * <code>columnIndex</code>. Answers true if a match is found, else false. If
222 * a match is found, <code>columnIndex</code> is advanced to the column after
223 * the start of the matched region, ready for a search from the next position.
226 * @param searchString
227 * @param searchPattern
228 * @param matchDescription
231 protected boolean findNextMatch(SequenceI seq, String searchString,
232 Regex searchPattern, boolean matchDescription)
234 SequenceGroup selection = viewport.getSelectionGroup();
235 if (selection != null && !selection.contains(seq))
238 * this sequence is not in the selection - advance to next sequence
246 * at start of sequence; try find by residue number, in sequence id,
247 * or (optionally) in sequence description
249 if (doNonMotifSearches(seq, searchString, searchPattern,
257 * search for next match in sequence string
259 int end = seq.getLength();
260 while (columnIndex < end)
262 if (searchNextVisibleRegion(seq, searchPattern))
271 * Searches the sequence, starting from <code>columnIndex</code>, and adds the
272 * next match (if any) to <code>searchResults</code>. The search is restricted
273 * to the next visible column region, and to the <code>selection</code> region
274 * if there is one. Answers true if a match is added, else false.
277 * @param searchPattern
280 protected boolean searchNextVisibleRegion(SequenceI seq, Regex searchPattern)
282 Range visible = getNextVisibleSequenceRegion(seq, columnIndex);
287 String seqString = seq.getSequenceAsString(visible.start, visible.end + 1);
288 String noGaps = AlignSeq.extractGaps(Comparison.GapChars, seqString);
290 if (searchPattern.search(noGaps))
292 int sequenceStartPosition = seq.findPosition(visible.start);
293 recordMatch(seq, searchPattern, sequenceStartPosition);
299 * no match - advance columnIndex past this visible region
300 * so the next visible region (if any) is searched next
302 columnIndex = visible.end + 1;
309 * Adds the match held in the <code>searchPattern</code> Regex to the
310 * <code>searchResults</code>, unless it is a subregion of the last match
311 * recorded. <code>columnIndex</code> is advanced to the position after the
312 * start of the matched region, ready for the next search. Answers true if a
313 * match was added, else false.
316 * @param searchPattern
317 * @param firstResiduePosition
320 protected boolean recordMatch(SequenceI seq, Regex searchPattern,
321 int firstResiduePosition)
324 * get start/end of the match in sequence coordinates
326 int offset = searchPattern.matchedFrom();
327 int matchStartPosition = firstResiduePosition + offset;
328 int matchEndPosition = matchStartPosition
329 + searchPattern.charsMatched() - 1;
332 * update columnIndex to next column after the start of the match
333 * (findIndex returns a value base 1, columnIndex is held base 0)
335 columnIndex = seq.findIndex(matchStartPosition);
338 * check that this match is not a subset of the previous one (JAL-2302)
340 List<SearchResultMatchI> matches = searchResults.getResults();
341 SearchResultMatchI lastMatch = matches.isEmpty() ? null
342 : matches.get(matches.size() - 1);
344 if (lastMatch == null || !lastMatch.contains(seq, matchStartPosition,
347 searchResults.addResult(seq, matchStartPosition, matchEndPosition);
355 * Does searches other than for residue patterns. Currently this includes
357 * <li>find residue by position (if search string is a number)</li>
358 * <li>match search string to sequence id</li>
359 * <li>match search string to sequence description (optional)</li>
361 * Answers true if a match is found, else false.
364 * @param searchString
365 * @param searchPattern
366 * @param includeDescription
369 protected boolean doNonMotifSearches(SequenceI seq, String searchString,
370 Regex searchPattern, boolean includeDescription)
373 * position sequence search to start of sequence
377 if (searchForResidueNumber(seq, searchString))
381 if (searchSequenceName(seq, searchPattern))
385 if (includeDescription && searchSequenceDescription(seq, searchPattern))
393 * Searches for a match with the sequence description, and if found, adds the
394 * sequence to the list of match ids (but not as a duplicate). Answers true if
395 * a match was added, else false.
398 * @param searchPattern
401 protected boolean searchSequenceDescription(SequenceI seq, Regex searchPattern)
403 String desc = seq.getDescription();
404 if (desc != null && searchPattern.search(desc) && !idMatches.contains(seq))
406 idMatches.addElement(seq);
413 * Searches for a match with the sequence name, and if found, adds the
414 * sequence to the list of match ids (but not as a duplicate). Answers true if
415 * a match was added, else false.
418 * @param searchPattern
421 protected boolean searchSequenceName(SequenceI seq, Regex searchPattern)
423 if (searchPattern.search(seq.getName()) && !idMatches.contains(seq))
425 idMatches.addElement(seq);
432 * Tries to interpret the search string as a residue position, and if valid,
433 * adds the position to the search results and returns true, else answers
436 protected boolean searchForResidueNumber(SequenceI seq, String searchString)
440 int res = Integer.parseInt(searchString);
441 if (seq.getStart() <= res && seq.getEnd() >= res)
443 searchResults.addResult(seq, res, res);
446 } catch (NumberFormatException ex)
453 * @see jalview.analysis.FinderI#getIdMatch()
456 public Vector<SequenceI> getIdMatches()
462 * @see jalview.analysis.FinderI#getSearchResults()
465 public SearchResultsI getSearchResults()
467 return searchResults;