2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.analysis;
23 import jalview.datamodel.AlignmentI;
24 import jalview.datamodel.Range;
25 import jalview.datamodel.SearchResultMatchI;
26 import jalview.datamodel.SearchResults;
27 import jalview.datamodel.SearchResultsI;
28 import jalview.datamodel.SequenceGroup;
29 import jalview.datamodel.SequenceI;
30 import jalview.datamodel.VisibleContigsIterator;
31 import jalview.util.Comparison;
33 import java.util.List;
34 import java.util.Vector;
36 import com.stevesoft.pat.Regex;
39 * Implements the search algorithm for the Find dialog
44 * matched residue locations
46 private SearchResultsI searchResults;
49 * sequences matched by id or description
51 private Vector<SequenceI> idMatch;
54 * the alignment to search over
56 private AlignmentI alignment;
59 * (optional) selection to restrict search to
61 private SequenceGroup selection;
64 * set true for case-sensitive search (default is false)
66 private boolean caseSensitive;
69 * set true to search sequence description (default is false)
71 private boolean includeDescription;
74 * set true to return all matches (default is next match only)
76 private boolean findAll;
79 * sequence index in alignment to search from
81 private int sequenceIndex;
84 * column position in sequence to search from, base 0
85 * - absolute column number including any hidden columns
86 * (position of last match for a repeat search)
88 private int columnIndex;
91 * Constructor to start searching an alignment, optionally restricting results
97 public Finder(AlignmentI al, SequenceGroup sel)
103 * Constructor to resume search at given sequence and residue on alignment and
104 * (optionally) restricted to a selection
111 public Finder(AlignmentI al, SequenceGroup sel, int seqindex,
115 this.selection = sel;
116 this.sequenceIndex = seqindex;
117 this.columnIndex = colindex;
121 * Performs a find for the given search string. By default the next match is
122 * found, but if setFindAll(true) has been called, then all matches are found.
123 * Sequences matched by id or description can be retrieved by getIdMatch(),
124 * and matched residue patterns by getSearchResults().
126 * @param theSearchString
129 public void find(String theSearchString)
137 String searchString = caseSensitive ? theSearchString
138 : theSearchString.toUpperCase();
139 Regex searchPattern = new Regex(searchString);
140 searchPattern.setIgnoreCase(!caseSensitive);
141 searchResults = new SearchResults();
142 idMatch = new Vector<>();
144 if (selection != null && selection.getSize() < 1)
146 selection = null; // ? ignore column-only selection
149 int end = alignment.getHeight();
151 while (sequenceIndex < end)
153 SequenceI seq = alignment.getSequenceAt(sequenceIndex);
154 boolean found = findNext(seq, searchString, searchPattern);
155 if (found && !findAll)
168 * Answers the start-end column range of the visible region of
169 * <code>sequence</code> starting at or after the given <code>column</code>.
170 * If there are no hidden columns, this just returns the remaining width of
171 * the sequence. The range is restricted to the current <code>selection</code>
172 * if there is one. Answers null if there are no visible columns at or after
173 * <code>column</code>.
175 protected Range getNextVisibleSequenceRegion(SequenceI sequence,
178 int seqColStart = column;
179 int seqColEnd = sequence.getLength() - 1;
182 * restrict search to (next) visible column region,
183 * in case there are hidden columns
185 VisibleContigsIterator visibleRegions = alignment.getHiddenColumns()
186 .getVisContigsIterator(column, alignment.getWidth(),
188 int[] visible = visibleRegions.hasNext() ? visibleRegions.next() : null;
191 columnIndex = seqColEnd + 1;
194 seqColStart = Math.max(seqColStart, visible[0]);
195 seqColEnd = Math.min(seqColEnd, visible[1]);
198 * restrict search to selected region if there is one
200 if (selection != null)
202 int selectionStart = selection.getStartRes();
203 int selectionEnd = selection.getEndRes();
204 if (selectionStart > seqColEnd || selectionEnd < seqColStart)
207 * sequence region doesn't overlap selection region
209 columnIndex = seqColEnd + 1;
212 seqColStart = Math.max(seqColStart, selectionStart);
213 seqColEnd = Math.min(seqColEnd, selectionEnd);
216 return new Range(seqColStart, seqColEnd);
220 * Finds the next match in the given sequence, starting at column at
221 * <code>columnIndex</code>. Answers true if a match is found, else false. If
222 * a match is found, <code>columnIndex</code> is advanced to the column after
223 * the start of the matched region, ready for a search from the next position.
226 * @param searchString
227 * @param searchPattern
230 protected boolean findNext(SequenceI seq, String searchString,
233 if (selection != null && !selection.contains(seq))
236 * this sequence is not in the selection - advance to next sequence
244 * at start of sequence; try find by residue number, in sequence id,
245 * or (optionally) in sequence description
247 if (doNonMotifSearches(seq, searchString, searchPattern))
254 * search for next match in sequence string
256 int end = seq.getLength();
257 while (columnIndex < end)
259 if (searchNextVisibleRegion(seq, searchPattern))
268 * Searches the sequence, starting from <code>columnIndex</code>, and adds the
269 * next match (if any) to <code>searchResults</code>. The search is restricted
270 * to the next visible column region, and to the <code>selection</code> region
271 * if there is one. Answers true if a match is added, else false.
274 * @param searchPattern
277 protected boolean searchNextVisibleRegion(SequenceI seq, Regex searchPattern)
279 Range visible = getNextVisibleSequenceRegion(seq, columnIndex);
284 String seqString = seq.getSequenceAsString(visible.start, visible.end + 1);
285 String noGaps = AlignSeq.extractGaps(Comparison.GapChars, seqString);
287 if (searchPattern.search(noGaps))
289 int sequenceStartPosition = seq.findPosition(visible.start);
290 recordMatch(seq, searchPattern, sequenceStartPosition);
296 * no match - advance columnIndex past this visible region
297 * so the next visible region (if any) is searched next
299 columnIndex = visible.end + 1;
306 * Adds the match held in the <code>searchPattern</code> Regex to the
307 * <code>searchResults</code>, unless it is a subregion of the last match
308 * recorded. <code>columnIndex</code> is advanced to the position after the
309 * start of the matched region, ready for the next search. Answers true if a
310 * match was added, else false.
313 * @param searchPattern
314 * @param firstResiduePosition
317 protected boolean recordMatch(SequenceI seq, Regex searchPattern,
318 int firstResiduePosition)
321 * get start/end of the match in sequence coordinates
323 int offset = searchPattern.matchedFrom();
324 int matchStartPosition = firstResiduePosition + offset;
325 int matchEndPosition = matchStartPosition
326 + searchPattern.charsMatched() - 1;
329 * update columnIndex to next column after the start of the match
330 * (findIndex returns a value base 1, columnIndex is held base 0)
332 columnIndex = seq.findIndex(matchStartPosition);
335 * check that this match is not a subset of the previous one (JAL-2302)
337 List<SearchResultMatchI> matches = searchResults.getResults();
338 SearchResultMatchI lastMatch = matches.isEmpty() ? null
339 : matches.get(matches.size() - 1);
341 if (lastMatch == null || !lastMatch.contains(seq, matchStartPosition,
344 searchResults.addResult(seq, matchStartPosition, matchEndPosition);
352 * Does searches other than for residue patterns. Currently this includes
354 * <li>find residue by position (if search string is a number)</li>
355 * <li>match search string to sequence id</li>
356 * <li>match search string to sequence description (optional)</li>
358 * Answers true if a match is found, else false.
361 * @param searchString
362 * @param searchPattern
365 protected boolean doNonMotifSearches(SequenceI seq, String searchString,
369 * position sequence search to start of sequence
373 if (searchForResidueNumber(seq, searchString))
377 if (searchSequenceName(seq, searchPattern))
381 if (includeDescription && searchSequenceDescription(seq, searchPattern))
389 * Searches for a match with the sequence description, and if found, adds the
390 * sequence to the list of match ids (but not as a duplicate). Answers true if
391 * a match was added, else false.
394 * @param searchPattern
397 protected boolean searchSequenceDescription(SequenceI seq, Regex searchPattern)
399 String desc = seq.getDescription();
400 if (desc != null && searchPattern.search(desc) && !idMatch.contains(seq))
402 idMatch.addElement(seq);
409 * Searches for a match with the sequence name, and if found, adds the
410 * sequence to the list of match ids (but not as a duplicate). Answers true if
411 * a match was added, else false.
414 * @param searchPattern
417 protected boolean searchSequenceName(SequenceI seq, Regex searchPattern)
419 if (searchPattern.search(seq.getName()) && !idMatch.contains(seq))
421 idMatch.addElement(seq);
428 * Tries to interpret the search string as a residue position, and if valid,
429 * adds the position to the search results and returns true, else answers
432 protected boolean searchForResidueNumber(SequenceI seq, String searchString)
436 int res = Integer.parseInt(searchString);
437 if (seq.getStart() <= res && seq.getEnd() >= res)
439 searchResults.addResult(seq, res, res);
442 } catch (NumberFormatException ex)
449 * Sets whether the search is case sensitive (default is no)
453 public void setCaseSensitive(boolean value)
455 this.caseSensitive = value;
459 * Sets whether search returns all matches. Default is to return the next
464 public void setFindAll(boolean value)
466 this.findAll = value;
470 * Returns the (possibly empty) list of sequences matched on sequence name or
475 public Vector<SequenceI> getIdMatch()
481 * Answers the search results (possibly empty) from the last search
485 public SearchResultsI getSearchResults()
487 return searchResults;
491 * Answers the absolute column position (base 0, including any hidden columns)
492 * of the start of the last sequence motif (residue pattern) match found. A
493 * 'Find next' will search from the next position.
497 public int getColumnIndex()
503 * Answers the offset in the alignment (0..) of the sequence in which the last
504 * match was found (if any)
508 public int getSequenceIndex()
510 return sequenceIndex;
514 * Sets whether search also searches in sequence description text (default is
519 public void setIncludeDescription(boolean value)
521 this.includeDescription = value;