2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.analysis;
23 import jalview.datamodel.AlignmentI;
24 import jalview.datamodel.SearchResultMatchI;
25 import jalview.datamodel.SearchResults;
26 import jalview.datamodel.SearchResultsI;
27 import jalview.datamodel.SequenceGroup;
28 import jalview.datamodel.SequenceI;
29 import jalview.util.Comparison;
31 import java.util.Vector;
33 import com.stevesoft.pat.Regex;
36 * Implements the search algorithm for the Find dialog
41 * match residue locations
43 private SearchResultsI searchResults;
46 * sequences matched by id or description
48 private Vector<SequenceI> idMatch;
51 * the alignment to search over
53 private AlignmentI alignment;
56 * (optional) selection to restrict search to
58 private SequenceGroup selection;
61 * set true for case-sensitive search (default is false)
63 private boolean caseSensitive;
66 * set true to search sequence description (default is false)
68 private boolean includeDescription;
71 * set true to return all matches (default is next match only)
73 private boolean findAll;
76 * sequence index in alignment to search from
81 * residue position in sequence to search from, base 1
82 * (position of last match for a repeat search)
87 * Constructor to start searching an alignment, optionally restricting results
93 public Finder(AlignmentI al, SequenceGroup sel)
99 * Constructor to resume search at given sequence and residue on alignment and
100 * (optionally) restricted to a selection
107 public Finder(AlignmentI al, SequenceGroup sel, int seqindex,
111 this.selection = sel;
112 this.seqIndex = seqindex;
113 this.resIndex = resindex;
117 * Performs a find for the given search string. By default the next match is
118 * found, but if setFindAll(true) has been called, then all matches are found.
119 * Sequences matched by id or description can be retrieved by getIdMatch(),
120 * and matched residue patterns by getSearchResults().
122 * @param theSearchString
125 public void find(String theSearchString)
127 String searchString = caseSensitive ? theSearchString.toUpperCase()
129 Regex regex = new Regex(searchString);
130 regex.setIgnoreCase(!caseSensitive);
131 searchResults = new SearchResults();
132 idMatch = new Vector<>();
134 if (selection != null && selection.getSize() < 1)
136 selection = null; // ? ignore column-only selection
139 boolean finished = false;
140 int end = alignment.getHeight();
142 while (!finished && (seqIndex < end))
144 SequenceI seq = alignment.getSequenceAt(seqIndex);
146 if ((selection != null) && !selection.contains(seq))
148 // this sequence is not in the selection - skip to next sequence
157 * at start of sequence; try find by residue number, in sequence id,
158 * or (optionally) in sequence description
161 if (doNonMotifSearches(seq, searchString, regex))
167 finished = searchSequenceString(seq, regex) && !findAll;
178 * Searches the sequence, starting from <code>resIndex</code> (base 1), and
179 * adds matches to <code>searchResults</code>. The search is restricted to the
180 * <code>selection</code> region if there is one. Answers true if any match is
187 protected boolean searchSequenceString(SequenceI seq, Regex regex)
190 * Restrict search to selected region if there is one
193 int seqColEnd = seq.getLength() - 1;
194 int residueOffset = 0;
195 if (selection != null)
197 int selColEnd = selection.getEndRes();
198 int selColStart = selection.getStartRes();
199 if (selColStart > seqColEnd)
201 return false; // sequence doesn't reach selection region
203 seqColStart = selColStart;
204 seqColEnd = Math.min(seqColEnd, selColEnd);
205 residueOffset = seq.findPosition(selection.getStartRes())
208 String seqString = seq.getSequenceAsString(seqColStart, seqColEnd + 1);
210 String noGaps = AlignSeq.extractGaps(Comparison.GapChars, seqString);
212 SearchResultMatchI lastMatch = null;
213 boolean found = false;
215 for (int r = resIndex; r < noGaps.length(); r++)
218 * searchFrom position is base 0, r is base 1,
219 * so search is from the position after the r'th residue
221 if (regex.searchFrom(noGaps, r))
223 resIndex = regex.matchedFrom();
224 resIndex += residueOffset; // add back #residues before selection region
225 int matchStartPosition = resIndex + seq.getStart();
226 int matchEndPosition = matchStartPosition + regex.charsMatched()
228 if (lastMatch == null || !lastMatch.contains(seq,
229 matchStartPosition, matchEndPosition))
231 lastMatch = searchResults.addResult(seq, matchStartPosition,
251 * Does searches other than for residue patterns. Currently this includes
253 * <li>find residue by position (if search string is a number)</li>
254 * <li>match search string to sequence id</li>
255 * <li>match search string to sequence description (optional)</li>
257 * Answers true if a match is found and we are not doing 'find all' (so this
258 * search action is complete), else false.
261 * @param searchString
265 protected boolean doNonMotifSearches(SequenceI seq, String searchString,
268 if (searchForResidueNumber(seq, searchString) && !findAll)
272 if (searchSequenceName(seq, regex) && !findAll)
276 if (searchSequenceDescription(seq, regex) && !findAll)
284 * Searches for a match with the sequence description, if that option was
285 * requested, and if found, adds the sequence to the list of match ids (but
286 * not as a duplicate). Answers true if a match was added, else false.
292 protected boolean searchSequenceDescription(SequenceI seq, Regex regex)
294 if (!includeDescription)
298 String desc = seq.getDescription();
299 if (desc != null && regex.search(desc) && !idMatch.contains(seq))
301 idMatch.addElement(seq);
308 * Searches for a match with the sequence name, and if found, adds the
309 * sequence to the list of match ids (but not as a duplicate). Answers true if
310 * a match was added, else false.
316 protected boolean searchSequenceName(SequenceI seq, Regex regex)
318 if (regex.search(seq.getName()) && !idMatch.contains(seq))
320 idMatch.addElement(seq);
327 * Tries to interpret the search string as a residue position, and if valid,
328 * adds the position to the search results
330 protected boolean searchForResidueNumber(SequenceI seq, String searchString)
334 int res = Integer.parseInt(searchString);
335 if (seq.getStart() <= res && seq.getEnd() >= res)
337 searchResults.addResult(seq, res, res);
340 } catch (NumberFormatException ex)
347 * Sets whether the search is case sensitive (default is no)
351 public void setCaseSensitive(boolean value)
353 this.caseSensitive = value;
357 * Sets whether search returns all matches. Default is to return the next
362 public void setFindAll(boolean value)
364 this.findAll = value;
368 * Returns the (possibly empty) list of matching sequences (when search
369 * includes searching sequence names)
373 public Vector<SequenceI> getIdMatch()
379 * @return the searchResults
381 public SearchResultsI getSearchResults()
383 return searchResults;
387 * @return the resIndex
389 public int getResIndex()
395 * @return the seqIndex
397 public int getSeqIndex()
403 * Sets whether search also searches in sequence description text (default is
408 public void setIncludeDescription(boolean value)
410 this.includeDescription = value;