/* * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) * Copyright (C) $$Year-Rel$$ The Jalview Authors * * This file is part of Jalview. * * Jalview is free software: you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation, either version 3 * of the License, or (at your option) any later version. * * Jalview is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty * of MERCHANTABILITY or FITNESS FOR A PARTICULAR * PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Jalview. If not, see . * The Jalview Authors are detailed in the 'AUTHORS' file. */ package jalview.analysis; import jalview.datamodel.AlignmentI; import jalview.datamodel.SearchResultMatchI; import jalview.datamodel.SearchResults; import jalview.datamodel.SearchResultsI; import jalview.datamodel.SequenceGroup; import jalview.datamodel.SequenceI; import jalview.datamodel.VisibleContigsIterator; import jalview.util.Comparison; import java.util.List; import java.util.Vector; import com.stevesoft.pat.Regex; /** * Implements the search algorithm for the Find dialog */ public class Finder { /* * matched residue locations */ private SearchResultsI searchResults; /* * sequences matched by id or description */ private Vector idMatch; /* * the alignment to search over */ private AlignmentI alignment; /* * (optional) selection to restrict search to */ private SequenceGroup selection; /* * set true for case-sensitive search (default is false) */ private boolean caseSensitive; /* * set true to search sequence description (default is false) */ private boolean includeDescription; /* * set true to return all matches (default is next match only) */ private boolean findAll; /* * sequence index in alignment to search from */ private int sequenceIndex; /* * column position in sequence to search from, base 0 * - absolute column number including any hidden columns * (position of last match for a repeat search) */ private int columnIndex; /** * Constructor to start searching an alignment, optionally restricting results * to a selection * * @param al * @param sel */ public Finder(AlignmentI al, SequenceGroup sel) { this(al, sel, 0, -1); } /** * Constructor to resume search at given sequence and residue on alignment and * (optionally) restricted to a selection * * @param al * @param sel * @param seqindex * @param colindex */ public Finder(AlignmentI al, SequenceGroup sel, int seqindex, int colindex) { this.alignment = al; this.selection = sel; this.sequenceIndex = seqindex; this.columnIndex = colindex; } /** * Performs a find for the given search string. By default the next match is * found, but if setFindAll(true) has been called, then all matches are found. * Sequences matched by id or description can be retrieved by getIdMatch(), * and matched residue patterns by getSearchResults(). * * @param theSearchString * @return */ public void find(String theSearchString) { if (findAll) { sequenceIndex = 0; columnIndex = -1; } String searchString = caseSensitive ? theSearchString : theSearchString.toUpperCase(); Regex searchPattern = new Regex(searchString); searchPattern.setIgnoreCase(!caseSensitive); searchResults = new SearchResults(); idMatch = new Vector<>(); if (selection != null && selection.getSize() < 1) { selection = null; // ? ignore column-only selection } int end = alignment.getHeight(); while (sequenceIndex < end) { SequenceI seq = alignment.getSequenceAt(sequenceIndex); boolean found = findNext(seq, searchString, searchPattern); if (found && !findAll) { return; } if (!found) { sequenceIndex++; columnIndex = -1; } } } /** * Answers the start-end column range of the visible region starting at or * after the given column. if there are no hidden columns, this just returns * the remaining width of the alignment. Answers null if there are no visible * columns at or after column. */ protected int[] getNextVisibleRegion(int column) { VisibleContigsIterator visibleRegions = alignment.getHiddenColumns() .getVisContigsIterator(column, alignment.getWidth(), false); return visibleRegions.hasNext() ? visibleRegions.next() : null; } /** * Finds the next match in the given sequence, starting at column at * columnIndex. Answers true if a match is found, else false. If * a match is found, columnIndex is advanced to the column after * the start of the matched region, ready for a search from the next position. * * @param seq * @param searchString * @param searchPattern * @return */ protected boolean findNext(SequenceI seq, String searchString, Regex searchPattern) { if (selection != null && !selection.contains(seq)) { /* * this sequence is not in the selection - advance to next sequence */ return false; } if (columnIndex < 0) { /* * at start of sequence; try find by residue number, in sequence id, * or (optionally) in sequence description */ if (doNonMotifSearches(seq, searchString, searchPattern)) { return true; } } /* * search for next match in sequence string */ int end = seq.getLength(); while (columnIndex < end) { if (searchNextVisibleRegion(seq, searchPattern)) { return true; } } return false; } /** * Searches the sequence, starting from columnIndex, and adds the * next match (if any) to searchResults. The search is restricted * to the next visible column region, and to the selection region * if there is one. Answers true if a match is added, else false. * * @param seq * @param searchPattern * @return */ protected boolean searchNextVisibleRegion(SequenceI seq, Regex searchPattern) { /* * sequence columns to search (working in absolute column * positions, base 0, including any hidden columns) */ int seqColStart = columnIndex; int seqColEnd = seq.getLength() - 1; /* * restrict search to (next) visible column region, * in case there are hidden columns */ int[] visible = getNextVisibleRegion(columnIndex); if (visible != null) { seqColStart = Math.max(seqColStart, visible[0]); seqColEnd = Math.min(seqColEnd, visible[1]); } else { columnIndex = seqColEnd + 1; return false; } /* * restrict search to selected region if there is one */ if (selection != null) { int selectionStart = selection.getStartRes(); int selectionEnd = selection.getEndRes(); if (selectionStart > seqColEnd || selectionEnd < seqColStart) { /* * sequence region doesn't overlap selection region - * no match, advance to next visible region */ columnIndex = seqColEnd + 1; return false; } seqColStart = Math.max(seqColStart, selectionStart); seqColEnd = Math.min(seqColEnd, selectionEnd); } String seqString = seq.getSequenceAsString(seqColStart, seqColEnd + 1); String noGaps = AlignSeq.extractGaps(Comparison.GapChars, seqString); if (searchPattern.search(noGaps)) { int sequenceStartPosition = seq.findPosition(seqColStart); recordMatch(seq, searchPattern, sequenceStartPosition); return true; } else { /* * no match - advance columnIndex past this visible region * so the next visible region (if any) is searched next */ columnIndex = seqColEnd + 1; } return false; } /** * Adds the match held in the searchPattern Regex to the * searchResults, unless it is a subregion of the last match * recorded. columnIndex is advanced to the position after the * start of the matched region, ready for the next search. Answers true if a * match was added, else false. * * @param seq * @param searchPattern * @param firstResiduePosition * @return */ protected boolean recordMatch(SequenceI seq, Regex searchPattern, int firstResiduePosition) { /* * get start/end of the match in sequence coordinates */ int offset = searchPattern.matchedFrom(); int matchStartPosition = firstResiduePosition + offset; int matchEndPosition = matchStartPosition + searchPattern.charsMatched() - 1; /* * update columnIndex to next column after the start of the match * (findIndex returns a value base 1, columnIndex is held base 0) */ columnIndex = seq.findIndex(matchStartPosition); /* * check that this match is not a subset of the previous one (JAL-2302) */ List matches = searchResults.getResults(); SearchResultMatchI lastMatch = matches.isEmpty() ? null : matches.get(matches.size() - 1); if (lastMatch == null || !lastMatch.contains(seq, matchStartPosition, matchEndPosition)) { searchResults.addResult(seq, matchStartPosition, matchEndPosition); return true; } return false; } /** * Does searches other than for residue patterns. Currently this includes *
    *
  • find residue by position (if search string is a number)
  • *
  • match search string to sequence id
  • *
  • match search string to sequence description (optional)
  • *
* Answers true if a match is found, else false. * * @param seq * @param searchString * @param searchPattern * @return */ protected boolean doNonMotifSearches(SequenceI seq, String searchString, Regex searchPattern) { /* * position sequence search to start of sequence */ columnIndex = 0; if (searchForResidueNumber(seq, searchString)) { return true; } if (searchSequenceName(seq, searchPattern)) { return true; } if (includeDescription && searchSequenceDescription(seq, searchPattern)) { return true; } return false; } /** * Searches for a match with the sequence description, and if found, adds the * sequence to the list of match ids (but not as a duplicate). Answers true if * a match was added, else false. * * @param seq * @param searchPattern * @return */ protected boolean searchSequenceDescription(SequenceI seq, Regex searchPattern) { String desc = seq.getDescription(); if (desc != null && searchPattern.search(desc) && !idMatch.contains(seq)) { idMatch.addElement(seq); return true; } return false; } /** * Searches for a match with the sequence name, and if found, adds the * sequence to the list of match ids (but not as a duplicate). Answers true if * a match was added, else false. * * @param seq * @param searchPattern * @return */ protected boolean searchSequenceName(SequenceI seq, Regex searchPattern) { if (searchPattern.search(seq.getName()) && !idMatch.contains(seq)) { idMatch.addElement(seq); return true; } return false; } /** * Tries to interpret the search string as a residue position, and if valid, * adds the position to the search results and returns true, else answers * false */ protected boolean searchForResidueNumber(SequenceI seq, String searchString) { try { int res = Integer.parseInt(searchString); if (seq.getStart() <= res && seq.getEnd() >= res) { searchResults.addResult(seq, res, res); return true; } } catch (NumberFormatException ex) { } return false; } /** * Sets whether the search is case sensitive (default is no) * * @param value */ public void setCaseSensitive(boolean value) { this.caseSensitive = value; } /** * Sets whether search returns all matches. Default is to return the next * match only. * * @param value */ public void setFindAll(boolean value) { this.findAll = value; } /** * Returns the (possibly empty) list of sequences matched on sequence name or * description * * @return */ public Vector getIdMatch() { return idMatch; } /** * Answers the search results (possibly empty) from the last search * * @return */ public SearchResultsI getSearchResults() { return searchResults; } /** * Answers the absolute column position (base 0, including any hidden columns) * of the start of the last sequence motif (residue pattern) match found. A * 'Find next' will search from the next position. * * @return */ public int getColumnIndex() { return columnIndex; } /** * Answers the offset in the alignment (0..) of the sequence in which the last * match was found (if any) * * @return */ public int getSequenceIndex() { return sequenceIndex; } /** * Sets whether search also searches in sequence description text (default is * no) * * @param value */ public void setIncludeDescription(boolean value) { this.includeDescription = value; } }