2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.analysis;
23 import java.util.ArrayList;
24 import java.util.Arrays;
25 import java.util.Iterator;
26 import java.util.List;
27 import java.util.Locale;
29 import com.stevesoft.pat.Regex;
31 import jalview.api.AlignViewportI;
32 import jalview.api.FeatureRenderer;
33 import jalview.api.FinderI;
34 import jalview.datamodel.AlignmentI;
35 import jalview.datamodel.SearchResultMatchI;
36 import jalview.datamodel.SearchResults;
37 import jalview.datamodel.SearchResultsI;
38 import jalview.datamodel.SequenceFeature;
39 import jalview.datamodel.SequenceGroup;
40 import jalview.datamodel.SequenceI;
41 import jalview.datamodel.features.SequenceFeaturesI;
42 import jalview.util.Comparison;
43 import jalview.util.MapList;
46 * Implements the search algorithm for the Find dialog
48 public class Finder implements FinderI
51 * matched residue locations
53 private SearchResultsI searchResults;
56 * sequences matched by id or description
58 private List<SequenceI> idMatches;
61 * the viewport to search over
63 private AlignViewportI viewport;
66 * feature renderer model - if available
68 FeatureRenderer frm = null;
71 * sequence index in alignment to search from
73 private int sequenceIndex;
76 * position offset in sequence to search from, base 0
77 * (position after start of last match for a 'find next')
79 private int residueIndex;
82 * last feature matched when incrementally searching sequence features
84 private SequenceFeature lastFeature;
87 * last sequenceIndex used when lastFeature was discovered
89 private int lastFeatureSequenceIndex;
92 * the true sequence position of the start of the
93 * last sequence searched (when 'ignore hidden regions' does not apply)
95 private int searchedSequenceStartPosition;
98 * when 'ignore hidden regions' applies, this holds the mapping from
99 * the visible sequence positions (1, 2, ...) to true sequence positions
101 private MapList searchedSequenceMap;
103 private String seqToSearch;
106 * Constructor for searching a viewport
110 public Finder(AlignViewportI av)
113 this.sequenceIndex = 0;
114 this.residueIndex = -1;
118 public void findAll(String theSearchString, boolean matchCase,
119 boolean searchDescription, boolean searchFeatureDesc,
120 boolean ignoreHidden)
123 * search from the start
126 lastFeatureSequenceIndex = 0;
130 doFind(theSearchString, matchCase, searchDescription, searchFeatureDesc,
134 * reset to start for next search
139 lastFeatureSequenceIndex = 0;
143 public void findNext(String theSearchString, boolean matchCase,
144 boolean searchDescription, boolean searchFeatureDesc,
145 boolean ignoreHidden)
147 doFind(theSearchString, matchCase, searchDescription, searchFeatureDesc,
148 false, ignoreHidden);
150 if (searchResults.isEmpty() && idMatches.isEmpty())
153 * search failed - reset to start for next search
158 lastFeatureSequenceIndex = 0;
163 * Performs a 'find next' or 'find all'
165 * @param theSearchString
167 * @param searchDescription
169 * @param ignoreHidden
171 protected void doFind(String theSearchString, boolean matchCase,
172 boolean searchDescription, boolean searchFeatureDesc,
173 boolean findAll, boolean ignoreHidden)
175 searchResults = new SearchResults();
176 idMatches = new ArrayList<>();
178 String searchString = matchCase ? theSearchString
179 : theSearchString.toUpperCase(Locale.ROOT);
180 Regex searchPattern = new Regex(searchString);
181 searchPattern.setIgnoreCase(!matchCase);
183 SequenceGroup selection = viewport.getSelectionGroup();
184 if (selection != null && selection.getSize() < 1)
186 selection = null; // ? ignore column-only selection
189 AlignmentI alignment = viewport.getAlignment();
190 int end = alignment.getHeight();
192 getSequence(ignoreHidden);
194 boolean found = false;
195 while ((!found || findAll) && sequenceIndex < end)
197 found = findNextMatch(searchString, searchPattern, searchDescription,
198 searchFeatureDesc, ignoreHidden);
203 * Calculates and saves the sequence string to search. The string is
204 * restricted to the current selection region if there is one, and is saved
205 * with all gaps removed.
207 * If there are hidden columns, and option {@ignoreHidden} is selected, then
208 * only visible positions of the sequence are included, and a mapping is also
209 * constructed from the returned string positions to the true sequence
212 * Note we have to do this each time {@code findNext} or {@code findAll} is
213 * called, in case the alignment, selection group or hidden columns have
214 * changed. In particular, if the sequence at offset {@code sequenceIndex} in
215 * the alignment is (no longer) in the selection group, search is advanced to
216 * the next sequence that is.
218 * Sets sequence string to the empty string if there are no more sequences (in
219 * selection group if any) at or after {@code sequenceIndex}.
221 * Returns true if a sequence could be found, false if end of alignment was
224 * @param ignoreHidden
227 private boolean getSequence(boolean ignoreHidden)
229 AlignmentI alignment = viewport.getAlignment();
230 if (sequenceIndex >= alignment.getHeight())
235 SequenceI seq = alignment.getSequenceAt(sequenceIndex);
236 SequenceGroup selection = viewport.getSelectionGroup();
237 if (selection != null && !selection.contains(seq))
239 if (!nextSequence(ignoreHidden))
243 seq = alignment.getSequenceAt(sequenceIndex);
246 String seqString = null;
249 seqString = getVisibleSequence(seq);
250 this.searchedSequenceStartPosition = 1;
255 int endCol = seq.getLength() - 1;
256 this.searchedSequenceStartPosition = seq.getStart();
257 if (selection != null)
259 startCol = selection.getStartRes();
260 endCol = Math.min(endCol, selection.getEndRes());
261 this.searchedSequenceStartPosition = seq.findPosition(startCol);
263 seqString = seq.getSequenceAsString(startCol, endCol + 1);
267 * remove gaps; note that even if this leaves an empty string, we 'search'
268 * the sequence anyway (for possible match on name or description)
270 String ungapped = AlignSeq.extractGaps(Comparison.GapChars, seqString);
271 this.seqToSearch = ungapped;
277 * Returns a string consisting of only the visible residues of {@code seq}
278 * from alignment column {@ fromColumn}, restricted to the current selection
279 * region if there is one.
281 * As a side-effect, also computes the mapping from the true sequence
282 * positions to the positions (1, 2, ...) of the returned sequence. This is to
283 * allow search matches in the visible sequence to be converted to sequence
289 private String getVisibleSequence(SequenceI seq)
292 * get start / end columns of sequence and convert to base 0
293 * (so as to match the visible column ranges)
295 int seqStartCol = seq.findIndex(seq.getStart()) - 1;
296 int seqEndCol = seq.findIndex(seq.getStart() + seq.getLength() - 1) - 1;
297 Iterator<int[]> visibleColumns = viewport.getViewAsVisibleContigs(true);
298 StringBuilder visibleSeq = new StringBuilder(seqEndCol - seqStartCol);
299 List<int[]> fromRanges = new ArrayList<>();
301 while (visibleColumns.hasNext())
303 int[] range = visibleColumns.next();
304 if (range[0] > seqEndCol)
306 // beyond the end of the sequence
309 if (range[1] < seqStartCol)
311 // before the start of the sequence
314 String subseq = seq.getSequenceAsString(range[0], range[1] + 1);
315 String ungapped = AlignSeq.extractGaps(Comparison.GapChars, subseq);
316 visibleSeq.append(ungapped);
317 if (!ungapped.isEmpty())
320 * visible region includes at least one non-gap character,
321 * so add the range to the mapping being constructed
323 int seqResFrom = seq.findPosition(range[0]);
324 int seqResTo = seqResFrom + ungapped.length() - 1;
325 fromRanges.add(new int[] { seqResFrom, seqResTo });
330 * construct the mapping
331 * from: visible sequence positions 1..length
332 * to: true residue positions of the alignment sequence
334 List<int[]> toRange = Arrays
336 { 1, visibleSeq.length() });
337 searchedSequenceMap = new MapList(fromRanges, toRange, 1, 1);
339 return visibleSeq.toString();
343 * Advances the search to the next sequence in the alignment. Sequences not in
344 * the current selection group (if there is one) are skipped. The
345 * (sub-)sequence to be searched is extracted, gaps removed, and saved, or set
346 * to null if there are no more sequences to search.
348 * Returns true if a sequence could be found, false if end of alignment was
351 * @param ignoreHidden
353 private boolean nextSequence(boolean ignoreHidden)
358 return getSequence(ignoreHidden);
362 * Finds the next match in the given sequence, starting at offset
363 * {@code residueIndex}. Answers true if a match is found, else false.
365 * If a match is found, {@code residueIndex} is advanced to the position after
366 * the start of the matched region, ready for the next search.
368 * If no match is found, {@code sequenceIndex} is advanced ready to search the
372 * @param searchString
373 * @param searchPattern
374 * @param matchDescription
375 * @param ignoreHidden
378 protected boolean findNextMatch(String searchString, Regex searchPattern,
379 boolean matchDescription, boolean matchFeatureDesc,
380 boolean ignoreHidden)
382 if (residueIndex < 0)
385 * at start of sequence; try find by residue number, in sequence id,
386 * or (optionally) in sequence description
388 if (doNonMotifSearches(searchString, searchPattern, matchDescription))
395 * search for next match in sequence string
397 int end = seqToSearch.length();
398 while (residueIndex < end)
400 boolean matched = searchPattern.searchFrom(seqToSearch, residueIndex);
403 if (recordMatch(searchPattern, ignoreHidden))
410 if (matchFeatureDesc)
412 matched = searchSequenceFeatures(residueIndex, searchPattern);
419 residueIndex = Integer.MAX_VALUE;
423 nextSequence(ignoreHidden);
428 * Adds the match held in the <code>searchPattern</code> Regex to the
429 * <code>searchResults</code>, unless it is a subregion of the last match
430 * recorded. <code>residueIndex</code> is advanced to the position after the
431 * start of the matched region, ready for the next search. Answers true if a
432 * match was added, else false.
434 * Matches that lie entirely within hidden regions of the alignment are not
437 * @param searchPattern
438 * @param ignoreHidden
441 protected boolean recordMatch(Regex searchPattern, boolean ignoreHidden)
443 SequenceI seq = viewport.getAlignment().getSequenceAt(sequenceIndex);
446 * convert start/end of the match to sequence coordinates
448 int offset = searchPattern.matchedFrom();
449 int matchStartPosition = this.searchedSequenceStartPosition + offset;
450 int matchEndPosition = matchStartPosition + searchPattern.charsMatched()
454 * update residueIndex to next position after the start of the match
455 * (findIndex returns a value base 1, columnIndex is held base 0)
457 residueIndex = searchPattern.matchedFrom() + 1;
460 * return false if the match is entirely in a hidden region
462 if (allHidden(seq, matchStartPosition, matchEndPosition))
468 * check that this match is not a subset of the previous one (JAL-2302)
470 List<SearchResultMatchI> matches = searchResults.getResults();
471 SearchResultMatchI lastMatch = matches.isEmpty() ? null
472 : matches.get(matches.size() - 1);
474 if (lastMatch == null || !lastMatch.contains(seq, matchStartPosition,
477 addMatch(seq, matchStartPosition, matchEndPosition, ignoreHidden);
485 * Adds one match to the stored list. If hidden residues are being skipped,
486 * then the match may need to be split into contiguous positions of the
487 * sequence (so it does not include skipped residues).
490 * @param matchStartPosition
491 * @param matchEndPosition
492 * @param ignoreHidden
494 private void addMatch(SequenceI seq, int matchStartPosition,
495 int matchEndPosition, boolean ignoreHidden)
502 searchResults.addResult(seq, matchStartPosition, matchEndPosition);
507 * get start-end contiguous ranges in underlying sequence
509 int[] truePositions = searchedSequenceMap
510 .locateInFrom(matchStartPosition, matchEndPosition);
511 searchResults.addResult(seq, truePositions);
515 * Returns true if all residues are hidden, else false
522 private boolean allHidden(SequenceI seq, int fromPos, int toPos)
524 if (!viewport.hasHiddenColumns())
528 for (int res = fromPos; res <= toPos; res++)
530 if (isVisible(seq, res))
539 * Does searches other than for residue patterns. Currently this includes
541 * <li>find residue by position (if search string is a number)</li>
542 * <li>match search string to sequence id</li>
543 * <li>match search string to sequence description (optional)</li>
545 * Answers true if a match is found, else false.
547 * @param searchString
548 * @param searchPattern
549 * @param includeDescription
552 protected boolean doNonMotifSearches(String searchString,
553 Regex searchPattern, boolean includeDescription)
555 SequenceI seq = viewport.getAlignment().getSequenceAt(sequenceIndex);
558 * position sequence search to start of sequence
563 int res = Integer.parseInt(searchString);
564 return searchForResidueNumber(seq, res);
565 } catch (NumberFormatException ex)
567 // search pattern is not a number
570 if (searchSequenceName(seq, searchPattern))
574 if (includeDescription && searchSequenceDescription(seq, searchPattern))
582 * Searches for a match with the sequence features, and if found, adds the
583 * sequence to the list of match ids, (but not as a duplicate). Answers true
584 * if a match was added, else false.
587 * @param searchPattern
590 protected boolean searchSequenceFeatures(int from, Regex searchPattern)
592 if (lastFeatureSequenceIndex != sequenceIndex)
594 lastFeatureSequenceIndex = sequenceIndex;
597 SequenceI seq = viewport.getAlignment().getSequenceAt(sequenceIndex);
598 SequenceFeaturesI sf = seq.getFeatures();
600 // TODO - stash feature list and search incrementally
601 List<SequenceFeature> allFeatures = null;
604 allFeatures = frm.findFeaturesAtResidue(seq, seq.getStart(),
609 allFeatures = sf.getAllFeatures(null);
611 // so we can check we are advancing when debugging
614 for (SequenceFeature feature : allFeatures)
617 if (lastFeature != null)
619 // iterate till we find last feature matched
620 if (lastFeature != feature)
631 if (searchPattern.search(feature.type) || (feature.description != null
632 && searchPattern.search(feature.description)))
634 searchResults.addResult(seq, feature.getBegin(), feature.getEnd());
635 lastFeature = feature;
639 residueIndex = Integer.MAX_VALUE;
645 * Searches for a match with the sequence description, and if found, adds the
646 * sequence to the list of match ids (but not as a duplicate). Answers true if
647 * a match was added, else false.
650 * @param searchPattern
653 protected boolean searchSequenceDescription(SequenceI seq,
656 String desc = seq.getDescription();
657 if (desc != null && searchPattern.search(desc)
658 && !idMatches.contains(seq))
667 * Searches for a match with the sequence name, and if found, adds the
668 * sequence to the list of match ids (but not as a duplicate). Answers true if
669 * a match was added, else false.
672 * @param searchPattern
675 protected boolean searchSequenceName(SequenceI seq, Regex searchPattern)
677 if (searchPattern.search(seq.getName()) && !idMatches.contains(seq))
686 * If the residue position is valid for the sequence, and in a visible column,
687 * adds the position to the search results and returns true, else answers
694 protected boolean searchForResidueNumber(SequenceI seq, int resNo)
696 if (seq.getStart() <= resNo && seq.getEnd() >= resNo)
698 if (isVisible(seq, resNo))
700 searchResults.addResult(seq, resNo, resNo);
708 * Returns true if the residue is in a visible column, else false
714 private boolean isVisible(SequenceI seq, int res)
716 if (!viewport.hasHiddenColumns())
720 int col = seq.findIndex(res); // base 1
721 return viewport.getAlignment().getHiddenColumns().isVisible(col - 1); // base
726 public List<SequenceI> getIdMatches()
732 public SearchResultsI getSearchResults()
734 return searchResults;
738 public void setFeatureRenderer(FeatureRenderer featureRenderer)
740 frm = featureRenderer;