/* * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) * Copyright (C) $$Year-Rel$$ The Jalview Authors * * This file is part of Jalview. * * Jalview is free software: you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation, either version 3 * of the License, or (at your option) any later version. * * Jalview is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty * of MERCHANTABILITY or FITNESS FOR A PARTICULAR * PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Jalview. If not, see . * The Jalview Authors are detailed in the 'AUTHORS' file. */ package jalview.analysis; import jalview.datamodel.AlignmentI; import jalview.datamodel.SearchResultMatchI; import jalview.datamodel.SearchResults; import jalview.datamodel.SearchResultsI; import jalview.datamodel.SequenceGroup; import jalview.datamodel.SequenceI; import jalview.util.Comparison; import java.util.Vector; import com.stevesoft.pat.Regex; /** * Implements the search algorithm for the Find dialog */ public class Finder { /* * match residue locations */ private SearchResultsI searchResults; /* * sequences matched by id or description */ private Vector idMatch; /* * the alignment to search over */ private AlignmentI alignment; /* * (optional) selection to restrict search to */ private SequenceGroup selection; /* * set true for case-sensitive search (default is false) */ private boolean caseSensitive; /* * set true to search sequence description (default is false) */ private boolean includeDescription; /* * set true to return all matches (default is next match only) */ private boolean findAll; /* * sequence index in alignment to search from */ private int seqIndex; /* * residue position in sequence to search from, base 1 * (position of last match for a repeat search) */ private int resIndex; /** * Constructor to start searching an alignment, optionally restricting results * to a selection * * @param al * @param sel */ public Finder(AlignmentI al, SequenceGroup sel) { this(al, sel, 0, -1); } /** * Constructor to resume search at given sequence and residue on alignment and * (optionally) restricted to a selection * * @param al * @param sel * @param seqindex * @param resindex */ public Finder(AlignmentI al, SequenceGroup sel, int seqindex, int resindex) { this.alignment = al; this.selection = sel; this.seqIndex = seqindex; this.resIndex = resindex; } /** * Performs a find for the given search string. By default the next match is * found, but if setFindAll(true) has been called, then all matches are found. * Sequences matched by id or description can be retrieved by getIdMatch(), * and matched residue patterns by getSearchResults(). * * @param theSearchString * @return */ public void find(String theSearchString) { String searchString = caseSensitive ? theSearchString.toUpperCase() : theSearchString; Regex regex = new Regex(searchString); regex.setIgnoreCase(!caseSensitive); searchResults = new SearchResults(); idMatch = new Vector<>(); if (selection != null && selection.getSize() < 1) { selection = null; // ? ignore column-only selection } boolean finished = false; int end = alignment.getHeight(); while (!finished && (seqIndex < end)) { SequenceI seq = alignment.getSequenceAt(seqIndex); if ((selection != null) && !selection.contains(seq)) { // this sequence is not in the selection - skip to next sequence seqIndex++; resIndex = -1; continue; } if (resIndex < 0) { /* * at start of sequence; try find by residue number, in sequence id, * or (optionally) in sequence description */ resIndex = 0; if (doNonMotifSearches(seq, searchString, regex)) { return; } } finished = searchSequenceString(seq, regex) && !findAll; if (!finished) { seqIndex++; resIndex = -1; } } } /** * Searches the sequence, starting from resIndex (base 1), and * adds matches to searchResults. The search is restricted to the * selection region if there is one. Answers true if any match is * added, else false. * * @param seq * @param regex * @return */ protected boolean searchSequenceString(SequenceI seq, Regex regex) { /* * Restrict search to selected region if there is one */ int seqColStart = 0; int seqColEnd = seq.getLength() - 1; int residueOffset = 0; if (selection != null) { int selColEnd = selection.getEndRes(); int selColStart = selection.getStartRes(); if (selColStart > seqColEnd) { return false; // sequence doesn't reach selection region } seqColStart = selColStart; seqColEnd = Math.min(seqColEnd, selColEnd); residueOffset = seq.findPosition(selection.getStartRes()) - seq.getStart(); } String seqString = seq.getSequenceAsString(seqColStart, seqColEnd + 1); String noGaps = AlignSeq.extractGaps(Comparison.GapChars, seqString); SearchResultMatchI lastMatch = null; boolean found = false; for (int r = resIndex; r < noGaps.length(); r++) { /* * searchFrom position is base 0, r is base 1, * so search is from the position after the r'th residue */ if (regex.searchFrom(noGaps, r)) { resIndex = regex.matchedFrom(); resIndex += residueOffset; // add back #residues before selection region int matchStartPosition = resIndex + seq.getStart(); int matchEndPosition = matchStartPosition + regex.charsMatched() - 1; if (lastMatch == null || !lastMatch.contains(seq, matchStartPosition, matchEndPosition)) { lastMatch = searchResults.addResult(seq, matchStartPosition, matchEndPosition); found = true; } if (!findAll) { resIndex++; return true; } r = resIndex; } else { break; } } return found; } /** * Does searches other than for residue patterns. Currently this includes * * Answers true if a match is found and we are not doing 'find all' (so this * search action is complete), else false. * * @param seq * @param searchString * @param regex * @return */ protected boolean doNonMotifSearches(SequenceI seq, String searchString, Regex regex) { if (searchForResidueNumber(seq, searchString) && !findAll) { return true; } if (searchSequenceName(seq, regex) && !findAll) { return true; } if (searchSequenceDescription(seq, regex) && !findAll) { return true; } return false; } /** * Searches for a match with the sequence description, if that option was * requested, and if found, adds the sequence to the list of match ids (but * not as a duplicate). Answers true if a match was added, else false. * * @param seq * @param regex * @return */ protected boolean searchSequenceDescription(SequenceI seq, Regex regex) { if (!includeDescription) { return false; } String desc = seq.getDescription(); if (desc != null && regex.search(desc) && !idMatch.contains(seq)) { idMatch.addElement(seq); return true; } return false; } /** * Searches for a match with the sequence name, and if found, adds the * sequence to the list of match ids (but not as a duplicate). Answers true if * a match was added, else false. * * @param seq * @param regex * @return */ protected boolean searchSequenceName(SequenceI seq, Regex regex) { if (regex.search(seq.getName()) && !idMatch.contains(seq)) { idMatch.addElement(seq); return true; } return false; } /** * Tries to interpret the search string as a residue position, and if valid, * adds the position to the search results */ protected boolean searchForResidueNumber(SequenceI seq, String searchString) { try { int res = Integer.parseInt(searchString); if (seq.getStart() <= res && seq.getEnd() >= res) { searchResults.addResult(seq, res, res); return true; } } catch (NumberFormatException ex) { } return false; } /** * Sets whether the search is case sensitive (default is no) * * @param value */ public void setCaseSensitive(boolean value) { this.caseSensitive = value; } /** * Sets whether search returns all matches. Default is to return the next * match only. * * @param value */ public void setFindAll(boolean value) { this.findAll = value; } /** * Returns the (possibly empty) list of matching sequences (when search * includes searching sequence names) * * @return */ public Vector getIdMatch() { return idMatch; } /** * @return the searchResults */ public SearchResultsI getSearchResults() { return searchResults; } /** * @return the resIndex */ public int getResIndex() { return resIndex; } /** * @return the seqIndex */ public int getSeqIndex() { return seqIndex; } /** * Sets whether search also searches in sequence description text (default is * no) * * @param value */ public void setIncludeDescription(boolean value) { this.includeDescription = value; } }