2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.analysis;
23 import jalview.api.AlignViewportI;
24 import jalview.api.FinderI;
25 import jalview.datamodel.AlignmentI;
26 import jalview.datamodel.SearchResultMatchI;
27 import jalview.datamodel.SearchResults;
28 import jalview.datamodel.SearchResultsI;
29 import jalview.datamodel.SequenceGroup;
30 import jalview.datamodel.SequenceI;
31 import jalview.util.Comparison;
32 import jalview.util.MapList;
34 import java.util.ArrayList;
35 import java.util.Arrays;
36 import java.util.Iterator;
37 import java.util.List;
39 import com.stevesoft.pat.Regex;
42 * Implements the search algorithm for the Find dialog
44 public class Finder implements FinderI
47 * matched residue locations
49 private SearchResultsI searchResults;
52 * sequences matched by id or description
54 private List<SequenceI> idMatches;
57 * the viewport to search over
59 private AlignViewportI viewport;
62 * sequence index in alignment to search from
64 private int sequenceIndex;
67 * position offset in sequence to search from, base 0
68 * (position after start of last match for a 'find next')
70 private int residueIndex;
73 * the true sequence position of the start of the
74 * last sequence searched (when 'ignore hidden regions' does not apply)
76 private int searchedSequenceStartPosition;
79 * when 'ignore hidden regions' applies, this holds the mapping from
80 * the visible sequence positions (1, 2, ...) to true sequence positions
82 private MapList searchedSequenceMap;
84 private String seqToSearch;
87 * Constructor for searching a viewport
91 public Finder(AlignViewportI av)
94 this.sequenceIndex = 0;
95 this.residueIndex = -1;
99 public void findAll(String theSearchString, boolean matchCase,
100 boolean searchDescription, boolean ignoreHidden)
103 * search from the start
108 doFind(theSearchString, matchCase, searchDescription, true,
112 * reset to start for next search
119 public void findNext(String theSearchString, boolean matchCase,
120 boolean searchDescription, boolean ignoreHidden)
122 doFind(theSearchString, matchCase, searchDescription, false,
125 if (searchResults.isEmpty() && idMatches.isEmpty())
128 * search failed - reset to start for next search
136 * Performs a 'find next' or 'find all'
138 * @param theSearchString
140 * @param searchDescription
142 * @param ignoreHidden
144 protected void doFind(String theSearchString, boolean matchCase,
145 boolean searchDescription, boolean findAll, boolean ignoreHidden)
147 searchResults = new SearchResults();
148 idMatches = new ArrayList<>();
150 String searchString = matchCase ? theSearchString
151 : theSearchString.toUpperCase();
152 Regex searchPattern = new Regex(searchString);
153 searchPattern.setIgnoreCase(!matchCase);
155 SequenceGroup selection = viewport.getSelectionGroup();
156 if (selection != null && selection.getSize() < 1)
158 selection = null; // ? ignore column-only selection
161 AlignmentI alignment = viewport.getAlignment();
162 int end = alignment.getHeight();
164 getSequence(ignoreHidden);
166 boolean found = false;
167 while (!found || findAll)
169 found = findNextMatch(searchString, searchPattern, searchDescription,
171 if (sequenceIndex >= end)
179 * Calculates and saves the sequence string to search. The string is restricted
180 * to the current selection region if there is one, and is saved with all gaps
183 * If there are hidden columns, and option {@ignoreHidden} is selected, then
184 * only visible positions of the sequence are included, and a mapping is also
185 * constructed from the returned string positions to the true sequence
188 * Note we have to do this each time {@code findNext} or {@code findAll} is
189 * called, in case the alignment, selection group or hidden columns have
190 * changed. In particular, if the sequence at offset {@code sequenceIndex} in
191 * the alignment is (no longer) in the selection group, search is advanced to
192 * the next sequence that is.
194 * Sets sequence string to the empty string if there are no more sequences (in
195 * selection group if any) at or after {@code sequenceIndex}.
197 * Returns true if a sequence could be found, false if end of alignment was
200 * @param ignoreHidden
203 private boolean getSequence(boolean ignoreHidden)
205 AlignmentI alignment = viewport.getAlignment();
206 if (sequenceIndex >= alignment.getHeight())
211 SequenceI seq = alignment.getSequenceAt(sequenceIndex);
212 SequenceGroup selection = viewport.getSelectionGroup();
213 if (selection != null && !selection.contains(seq))
215 if (!nextSequence(ignoreHidden))
219 seq = alignment.getSequenceAt(sequenceIndex);
222 String seqString = null;
225 seqString = getVisibleSequence(seq);
226 this.searchedSequenceStartPosition = 1;
231 int endCol = seq.getLength() - 1;
232 this.searchedSequenceStartPosition = seq.getStart();
233 if (selection != null)
235 startCol = selection.getStartRes();
236 endCol = Math.min(endCol, selection.getEndRes());
237 this.searchedSequenceStartPosition = seq.findPosition(startCol);
239 seqString = seq.getSequenceAsString(startCol, endCol + 1);
243 * remove gaps; note that even if this leaves an empty string, we 'search'
244 * the sequence anyway (for possible match on name or description)
246 String ungapped = AlignSeq.extractGaps(Comparison.GapChars, seqString);
247 this.seqToSearch = ungapped;
253 * Returns a string consisting of only the visible residues of {@code seq} from
254 * alignment column {@ fromColumn}, restricted to the current selection region
257 * As a side-effect, also computes the mapping from the true sequence positions
258 * to the positions (1, 2, ...) of the returned sequence. This is to allow
259 * search matches in the visible sequence to be converted to sequence positions.
264 private String getVisibleSequence(SequenceI seq)
267 * get start / end columns of sequence and convert to base 0
268 * (so as to match the visible column ranges)
270 int seqStartCol = seq.findIndex(seq.getStart()) - 1;
271 int seqEndCol = seq.findIndex(seq.getStart() + seq.getLength() - 1) - 1;
272 Iterator<int[]> visibleColumns = viewport.getViewAsVisibleContigs(true);
273 StringBuilder visibleSeq = new StringBuilder(seqEndCol - seqStartCol);
274 List<int[]> fromRanges = new ArrayList<>();
276 while (visibleColumns.hasNext())
278 int[] range = visibleColumns.next();
279 if (range[0] > seqEndCol)
281 // beyond the end of the sequence
284 if (range[1] < seqStartCol)
286 // before the start of the sequence
289 String subseq = seq.getSequenceAsString(range[0], range[1] + 1);
290 String ungapped = AlignSeq.extractGaps(Comparison.GapChars, subseq);
291 visibleSeq.append(ungapped);
292 if (!ungapped.isEmpty())
295 * visible region includes at least one non-gap character,
296 * so add the range to the mapping being constructed
298 int seqResFrom = seq.findPosition(range[0]);
299 int seqResTo = seqResFrom + ungapped.length() - 1;
300 fromRanges.add(new int[] { seqResFrom, seqResTo });
305 * construct the mapping
306 * from: visible sequence positions 1..length
307 * to: true residue positions of the alignment sequence
309 List<int[]> toRange = Arrays
311 { 1, visibleSeq.length() });
312 searchedSequenceMap = new MapList(fromRanges, toRange, 1, 1);
314 return visibleSeq.toString();
318 * Advances the search to the next sequence in the alignment. Sequences not in
319 * the current selection group (if there is one) are skipped. The (sub-)sequence
320 * to be searched is extracted, gaps removed, and saved, or set to null if there
321 * are no more sequences to search.
323 * Returns true if a sequence could be found, false if end of alignment was
326 * @param ignoreHidden
328 private boolean nextSequence(boolean ignoreHidden)
333 return getSequence(ignoreHidden);
337 * Finds the next match in the given sequence, starting at offset
338 * {@code residueIndex}. Answers true if a match is found, else false.
340 * If a match is found, {@code residueIndex} is advanced to the position after
341 * the start of the matched region, ready for the next search.
343 * If no match is found, {@code sequenceIndex} is advanced ready to search the
347 * @param searchString
348 * @param searchPattern
349 * @param matchDescription
350 * @param ignoreHidden
353 protected boolean findNextMatch(String searchString,
354 Regex searchPattern, boolean matchDescription,
355 boolean ignoreHidden)
357 if (residueIndex < 0)
360 * at start of sequence; try find by residue number, in sequence id,
361 * or (optionally) in sequence description
363 if (doNonMotifSearches(searchString, searchPattern,
371 * search for next match in sequence string
373 int end = seqToSearch.length();
374 while (residueIndex < end)
376 boolean matched = searchPattern.searchFrom(seqToSearch, residueIndex);
379 if (recordMatch(searchPattern, ignoreHidden))
386 residueIndex = Integer.MAX_VALUE;
390 nextSequence(ignoreHidden);
395 * Adds the match held in the <code>searchPattern</code> Regex to the
396 * <code>searchResults</code>, unless it is a subregion of the last match
397 * recorded. <code>residueIndex</code> is advanced to the position after the
398 * start of the matched region, ready for the next search. Answers true if a
399 * match was added, else false.
401 * Matches that lie entirely within hidden regions of the alignment are not
404 * @param searchPattern
405 * @param ignoreHidden
408 protected boolean recordMatch(Regex searchPattern, boolean ignoreHidden)
410 SequenceI seq = viewport.getAlignment().getSequenceAt(sequenceIndex);
413 * convert start/end of the match to sequence coordinates
415 int offset = searchPattern.matchedFrom();
416 int matchStartPosition = this.searchedSequenceStartPosition + offset;
417 int matchEndPosition = matchStartPosition
418 + searchPattern.charsMatched() - 1;
421 * update residueIndex to next position after the start of the match
422 * (findIndex returns a value base 1, columnIndex is held base 0)
424 residueIndex += offset + 1;
427 * return false if the match is entirely in a hidden region
429 if (allHidden(seq, matchStartPosition, matchEndPosition))
435 * check that this match is not a subset of the previous one (JAL-2302)
437 List<SearchResultMatchI> matches = searchResults.getResults();
438 SearchResultMatchI lastMatch = matches.isEmpty() ? null
439 : matches.get(matches.size() - 1);
441 if (lastMatch == null || !lastMatch.contains(seq, matchStartPosition,
444 addMatch(seq, matchStartPosition, matchEndPosition, ignoreHidden);
452 * Adds one match to the stored list. If hidden residues are being skipped, then
453 * the match may need to be split into contiguous positions of the sequence (so
454 * it does not include skipped residues).
457 * @param matchStartPosition
458 * @param matchEndPosition
459 * @param ignoreHidden
461 private void addMatch(SequenceI seq, int matchStartPosition,
462 int matchEndPosition, boolean ignoreHidden)
469 searchResults.addResult(seq, matchStartPosition, matchEndPosition);
474 * get start-end contiguous ranges in underlying sequence
476 int[] truePositions = searchedSequenceMap
477 .locateInFrom(matchStartPosition, matchEndPosition);
478 for (int i = 0; i < truePositions.length - 1; i += 2)
480 searchResults.addResult(seq, truePositions[i], truePositions[i + 1]);
485 * Returns true if all residues are hidden, else false
492 private boolean allHidden(SequenceI seq, int fromPos, int toPos)
494 if (!viewport.hasHiddenColumns())
498 for (int res = fromPos; res <= toPos; res++)
500 if (isVisible(seq, res))
509 * Does searches other than for residue patterns. Currently this includes
511 * <li>find residue by position (if search string is a number)</li>
512 * <li>match search string to sequence id</li>
513 * <li>match search string to sequence description (optional)</li>
515 * Answers true if a match is found, else false.
517 * @param searchString
518 * @param searchPattern
519 * @param includeDescription
522 protected boolean doNonMotifSearches(String searchString,
523 Regex searchPattern, boolean includeDescription)
525 SequenceI seq = viewport.getAlignment().getSequenceAt(sequenceIndex);
528 * position sequence search to start of sequence
533 int res = Integer.parseInt(searchString);
534 return searchForResidueNumber(seq, res);
535 } catch (NumberFormatException ex)
537 // search pattern is not a number
540 if (searchSequenceName(seq, searchPattern))
544 if (includeDescription && searchSequenceDescription(seq, searchPattern))
552 * Searches for a match with the sequence description, and if found, adds the
553 * sequence to the list of match ids (but not as a duplicate). Answers true if
554 * a match was added, else false.
557 * @param searchPattern
560 protected boolean searchSequenceDescription(SequenceI seq, Regex searchPattern)
562 String desc = seq.getDescription();
563 if (desc != null && searchPattern.search(desc) && !idMatches.contains(seq))
572 * Searches for a match with the sequence name, and if found, adds the
573 * sequence to the list of match ids (but not as a duplicate). Answers true if
574 * a match was added, else false.
577 * @param searchPattern
580 protected boolean searchSequenceName(SequenceI seq, Regex searchPattern)
582 if (searchPattern.search(seq.getName()) && !idMatches.contains(seq))
591 * If the residue position is valid for the sequence, and in a visible column,
592 * adds the position to the search results and returns true, else answers false.
598 protected boolean searchForResidueNumber(SequenceI seq, int resNo)
600 if (seq.getStart() <= resNo && seq.getEnd() >= resNo)
602 if (isVisible(seq, resNo))
604 searchResults.addResult(seq, resNo, resNo);
612 * Returns true if the residue is in a visible column, else false
618 private boolean isVisible(SequenceI seq, int res)
620 if (!viewport.hasHiddenColumns())
624 int col = seq.findIndex(res); // base 1
625 return viewport.getAlignment().getHiddenColumns().isVisible(col - 1); // base 0
629 public List<SequenceI> getIdMatches()
635 public SearchResultsI getSearchResults()
637 return searchResults;