X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fanalysis%2FFinder.java;h=3cbef6d89175201f0f9029cf3dfca2260bf0295c;hb=9b5106c49c8fa0a7269f38ad1825851eb8ba5b85;hp=d7bf0a270a78cfc627362595b165ddac2380b901;hpb=dfc18d64376ae95cfdbf3c112825587a8891179c;p=jalview.git diff --git a/src/jalview/analysis/Finder.java b/src/jalview/analysis/Finder.java index d7bf0a2..3cbef6d 100644 --- a/src/jalview/analysis/Finder.java +++ b/src/jalview/analysis/Finder.java @@ -20,14 +20,19 @@ */ package jalview.analysis; +import jalview.api.AlignViewportI; +import jalview.api.FinderI; import jalview.datamodel.AlignmentI; +import jalview.datamodel.Range; import jalview.datamodel.SearchResultMatchI; import jalview.datamodel.SearchResults; import jalview.datamodel.SearchResultsI; import jalview.datamodel.SequenceGroup; import jalview.datamodel.SequenceI; +import jalview.datamodel.VisibleContigsIterator; import jalview.util.Comparison; +import java.util.List; import java.util.Vector; import com.stevesoft.pat.Regex; @@ -35,216 +40,314 @@ import com.stevesoft.pat.Regex; /** * Implements the search algorithm for the Find dialog */ -public class Finder +public class Finder implements FinderI { /* - * match residue locations + * matched residue locations */ private SearchResultsI searchResults; /* * sequences matched by id or description */ - private Vector idMatch; + private Vector idMatches; /* - * the alignment to search over + * the viewport to search over */ - private AlignmentI alignment; - - /* - * (optional) selection to restrict search to - */ - private SequenceGroup selection; - - /* - * set true for case-sensitive search (default is false) - */ - private boolean caseSensitive; - - /* - * set true to search sequence description (default is false) - */ - private boolean includeDescription; - - /* - * set true to return all matches (default is next match only) - */ - private boolean findAll; + private AlignViewportI viewport; /* * sequence index in alignment to search from */ - private int seqIndex; + private int sequenceIndex; /* - * residue position in sequence to search from, base 1 - * (position of last match for a repeat search) + * column position in sequence to search from, base 0 + * - absolute column number including any hidden columns + * (position after start of last match for a repeat search) */ - private int resIndex; + private int columnIndex; /** - * Constructor to start searching an alignment, optionally restricting results - * to a selection + * Constructor for searching a viewport * - * @param al - * @param sel + * @param av */ - public Finder(AlignmentI al, SequenceGroup sel) + public Finder(AlignViewportI av) { - this(al, sel, 0, -1); + this.viewport = av; + this.sequenceIndex = 0; + this.columnIndex = -1; } - /** - * Constructor to resume search at given sequence and residue on alignment and - * (optionally) restricted to a selection - * - * @param al - * @param sel - * @param seqindex - * @param resindex - */ - public Finder(AlignmentI al, SequenceGroup sel, int seqindex, - int resindex) + @Override + public void findAll(String theSearchString, boolean matchCase, + boolean searchDescription) + { + /* + * search from the start + */ + sequenceIndex = 0; + columnIndex = -1; + + doFind(theSearchString, matchCase, searchDescription, true); + + /* + * reset to start for next search + */ + sequenceIndex = 0; + columnIndex = -1; + } + + @Override + public void findNext(String theSearchString, boolean matchCase, + boolean searchDescription) { - this.alignment = al; - this.selection = sel; - this.seqIndex = seqindex; - this.resIndex = resindex; + doFind(theSearchString, matchCase, searchDescription, false); + + if (searchResults.isEmpty() && idMatches.isEmpty()) + { + /* + * search failed - reset to start for next search + */ + sequenceIndex = 0; + columnIndex = -1; + } } /** - * Performs a find for the given search string. By default the next match is - * found, but if setFindAll(true) has been called, then all matches are found. - * Sequences matched by id or description can be retrieved by getIdMatch(), - * and matched residue patterns by getSearchResults(). + * Performs a 'find next' or 'find all' * * @param theSearchString - * @return + * @param matchCase + * @param searchDescription + * @param findAll */ - public void find(String theSearchString) + protected void doFind(String theSearchString, boolean matchCase, + boolean searchDescription, boolean findAll) { - String searchString = caseSensitive ? theSearchString.toUpperCase() - : theSearchString; - Regex regex = new Regex(searchString); - regex.setIgnoreCase(!caseSensitive); + String searchString = matchCase ? theSearchString + : theSearchString.toUpperCase(); + Regex searchPattern = new Regex(searchString); + searchPattern.setIgnoreCase(!matchCase); + searchResults = new SearchResults(); - idMatch = new Vector<>(); + idMatches = new Vector<>(); + SequenceGroup selection = viewport.getSelectionGroup(); if (selection != null && selection.getSize() < 1) { selection = null; // ? ignore column-only selection } - boolean finished = false; + AlignmentI alignment = viewport.getAlignment(); int end = alignment.getHeight(); - while (!finished && (seqIndex < end)) + while (sequenceIndex < end) { - SequenceI seq = alignment.getSequenceAt(seqIndex); - - if ((selection != null) && !selection.contains(seq)) + SequenceI seq = alignment.getSequenceAt(sequenceIndex); + boolean found = findNextMatch(seq, searchString, searchPattern, + searchDescription); + if (found && !findAll) { - // this sequence is not in the selection - skip to next sequence - seqIndex++; - resIndex = -1; - continue; + return; } - - if (resIndex < 0) + if (!found) { - /* - * at start of sequence; try find by residue number, in sequence id, - * or (optionally) in sequence description - */ - resIndex = 0; - if (doNonMotifSearches(seq, searchString, regex)) - { - return; - } + sequenceIndex++; + columnIndex = -1; } + } + } + + /** + * Answers the start-end column range of the visible region of + * sequence starting at or after the given column. + * If there are no hidden columns, this just returns the remaining width of + * the sequence. The range is restricted to the current selection + * if there is one. Answers null if there are no visible columns at or after + * column. + */ + protected Range getNextVisibleSequenceRegion(SequenceI sequence, + int column) + { + int seqColStart = column; + int seqColEnd = sequence.getLength() - 1; - finished = searchSequenceString(seq, regex) && !findAll; + /* + * restrict search to (next) visible column region, + * in case there are hidden columns + */ + AlignmentI alignment = viewport.getAlignment(); + VisibleContigsIterator visibleRegions = alignment.getHiddenColumns() + .getVisContigsIterator(column, alignment.getWidth(), + false); + int[] visible = visibleRegions.hasNext() ? visibleRegions.next() : null; + if (visible == null) + { + columnIndex = seqColEnd + 1; + return null; + } + seqColStart = Math.max(seqColStart, visible[0]); + seqColEnd = Math.min(seqColEnd, visible[1]); - if (!finished) + /* + * restrict search to selected region if there is one + */ + SequenceGroup selection = viewport.getSelectionGroup(); + if (selection != null) + { + int selectionStart = selection.getStartRes(); + int selectionEnd = selection.getEndRes(); + if (selectionStart > seqColEnd || selectionEnd < seqColStart) { - seqIndex++; - resIndex = -1; + /* + * sequence region doesn't overlap selection region + */ + columnIndex = seqColEnd + 1; + return null; } + seqColStart = Math.max(seqColStart, selectionStart); + seqColEnd = Math.min(seqColEnd, selectionEnd); } + + return new Range(seqColStart, seqColEnd); } /** - * Searches the sequence, starting from resIndex (base 1), and - * adds matches to searchResults. The search is restricted to the - * selection region if there is one. Answers true if any match is - * added, else false. + * Finds the next match in the given sequence, starting at column at + * columnIndex. Answers true if a match is found, else false. If + * a match is found, columnIndex is advanced to the column after + * the start of the matched region, ready for a search from the next position. * * @param seq - * @param regex + * @param searchString + * @param searchPattern + * @param matchDescription * @return */ - protected boolean searchSequenceString(SequenceI seq, Regex regex) + protected boolean findNextMatch(SequenceI seq, String searchString, + Regex searchPattern, boolean matchDescription) { + SequenceGroup selection = viewport.getSelectionGroup(); + if (selection != null && !selection.contains(seq)) + { + /* + * this sequence is not in the selection - advance to next sequence + */ + return false; + } + + if (columnIndex < 0) + { + /* + * at start of sequence; try find by residue number, in sequence id, + * or (optionally) in sequence description + */ + if (doNonMotifSearches(seq, searchString, searchPattern, + matchDescription)) + { + return true; + } + } + /* - * Restrict search to selected region if there is one + * search for next match in sequence string */ - int seqColStart = 0; - int seqColEnd = seq.getLength() - 1; - int residueOffset = 0; - if (selection != null) + int end = seq.getLength(); + while (columnIndex < end) { - int selColEnd = selection.getEndRes(); - int selColStart = selection.getStartRes(); - if (selColStart > seqColEnd) + if (searchNextVisibleRegion(seq, searchPattern)) { - return false; // sequence doesn't reach selection region + return true; } - seqColStart = selColStart; - seqColEnd = Math.min(seqColEnd, selColEnd); - residueOffset = seq.findPosition(selection.getStartRes()) - - seq.getStart(); } - String seqString = seq.getSequenceAsString(seqColStart, seqColEnd + 1); + return false; + } + /** + * Searches the sequence, starting from columnIndex, and adds the + * next match (if any) to searchResults. The search is restricted + * to the next visible column region, and to the selection region + * if there is one. Answers true if a match is added, else false. + * + * @param seq + * @param searchPattern + * @return + */ + protected boolean searchNextVisibleRegion(SequenceI seq, Regex searchPattern) + { + Range visible = getNextVisibleSequenceRegion(seq, columnIndex); + if (visible == null) + { + return false; + } + String seqString = seq.getSequenceAsString(visible.start, visible.end + 1); String noGaps = AlignSeq.extractGaps(Comparison.GapChars, seqString); - SearchResultMatchI lastMatch = null; - boolean found = false; - - for (int r = resIndex; r < noGaps.length(); r++) + if (searchPattern.search(noGaps)) + { + int sequenceStartPosition = seq.findPosition(visible.start); + recordMatch(seq, searchPattern, sequenceStartPosition); + return true; + } + else { /* - * searchFrom position is base 0, r is base 1, - * so search is from the position after the r'th residue + * no match - advance columnIndex past this visible region + * so the next visible region (if any) is searched next */ - if (regex.searchFrom(noGaps, r)) - { - resIndex = regex.matchedFrom(); - resIndex += residueOffset; // add back #residues before selection region - int matchStartPosition = resIndex + seq.getStart(); - int matchEndPosition = matchStartPosition + regex.charsMatched() - - 1; - if (lastMatch == null || !lastMatch.contains(seq, - matchStartPosition, matchEndPosition)) - { - lastMatch = searchResults.addResult(seq, matchStartPosition, - matchEndPosition); - found = true; - } - if (!findAll) - { - resIndex++; - return true; - } - r = resIndex; - } - else - { - break; - } + columnIndex = visible.end + 1; + } + + return false; + } + + /** + * Adds the match held in the searchPattern Regex to the + * searchResults, unless it is a subregion of the last match + * recorded. columnIndex is advanced to the position after the + * start of the matched region, ready for the next search. Answers true if a + * match was added, else false. + * + * @param seq + * @param searchPattern + * @param firstResiduePosition + * @return + */ + protected boolean recordMatch(SequenceI seq, Regex searchPattern, + int firstResiduePosition) + { + /* + * get start/end of the match in sequence coordinates + */ + int offset = searchPattern.matchedFrom(); + int matchStartPosition = firstResiduePosition + offset; + int matchEndPosition = matchStartPosition + + searchPattern.charsMatched() - 1; + + /* + * update columnIndex to next column after the start of the match + * (findIndex returns a value base 1, columnIndex is held base 0) + */ + columnIndex = seq.findIndex(matchStartPosition); + + /* + * check that this match is not a subset of the previous one (JAL-2302) + */ + List matches = searchResults.getResults(); + SearchResultMatchI lastMatch = matches.isEmpty() ? null + : matches.get(matches.size() - 1); + + if (lastMatch == null || !lastMatch.contains(seq, matchStartPosition, + matchEndPosition)) + { + searchResults.addResult(seq, matchStartPosition, matchEndPosition); + return true; } - return found; + + return false; } /** @@ -254,26 +357,31 @@ public class Finder *
  • match search string to sequence id
  • *
  • match search string to sequence description (optional)
  • * - * Answers true if a match is found and we are not doing 'find all' (so this - * search action is complete), else false. + * Answers true if a match is found, else false. * * @param seq * @param searchString - * @param regex + * @param searchPattern + * @param includeDescription * @return */ protected boolean doNonMotifSearches(SequenceI seq, String searchString, - Regex regex) + Regex searchPattern, boolean includeDescription) { - if (searchForResidueNumber(seq, searchString) && !findAll) + /* + * position sequence search to start of sequence + */ + columnIndex = 0; + + if (searchForResidueNumber(seq, searchString)) { return true; } - if (searchSequenceName(seq, regex) && !findAll) + if (searchSequenceName(seq, searchPattern)) { return true; } - if (searchSequenceDescription(seq, regex) && !findAll) + if (includeDescription && searchSequenceDescription(seq, searchPattern)) { return true; } @@ -281,24 +389,20 @@ public class Finder } /** - * Searches for a match with the sequence description, if that option was - * requested, and if found, adds the sequence to the list of match ids (but - * not as a duplicate). Answers true if a match was added, else false. + * Searches for a match with the sequence description, and if found, adds the + * sequence to the list of match ids (but not as a duplicate). Answers true if + * a match was added, else false. * * @param seq - * @param regex + * @param searchPattern * @return */ - protected boolean searchSequenceDescription(SequenceI seq, Regex regex) + protected boolean searchSequenceDescription(SequenceI seq, Regex searchPattern) { - if (!includeDescription) - { - return false; - } String desc = seq.getDescription(); - if (desc != null && regex.search(desc) && !idMatch.contains(seq)) + if (desc != null && searchPattern.search(desc) && !idMatches.contains(seq)) { - idMatch.addElement(seq); + idMatches.addElement(seq); return true; } return false; @@ -310,14 +414,14 @@ public class Finder * a match was added, else false. * * @param seq - * @param regex + * @param searchPattern * @return */ - protected boolean searchSequenceName(SequenceI seq, Regex regex) + protected boolean searchSequenceName(SequenceI seq, Regex searchPattern) { - if (regex.search(seq.getName()) && !idMatch.contains(seq)) + if (searchPattern.search(seq.getName()) && !idMatches.contains(seq)) { - idMatch.addElement(seq); + idMatches.addElement(seq); return true; } return false; @@ -325,7 +429,8 @@ public class Finder /** * Tries to interpret the search string as a residue position, and if valid, - * adds the position to the search results + * adds the position to the search results and returns true, else answers + * false */ protected boolean searchForResidueNumber(SequenceI seq, String searchString) { @@ -343,70 +448,21 @@ public class Finder return false; } - /** - * Sets whether the search is case sensitive (default is no) - * - * @param value + /* (non-Javadoc) + * @see jalview.analysis.FinderI#getIdMatch() */ - public void setCaseSensitive(boolean value) + @Override + public Vector getIdMatches() { - this.caseSensitive = value; + return idMatches; } - /** - * Sets whether search returns all matches. Default is to return the next - * match only. - * - * @param value - */ - public void setFindAll(boolean value) - { - this.findAll = value; - } - - /** - * Returns the (possibly empty) list of matching sequences (when search - * includes searching sequence names) - * - * @return - */ - public Vector getIdMatch() - { - return idMatch; - } - - /** - * @return the searchResults + /* (non-Javadoc) + * @see jalview.analysis.FinderI#getSearchResults() */ + @Override public SearchResultsI getSearchResults() { return searchResults; } - - /** - * @return the resIndex - */ - public int getResIndex() - { - return resIndex; - } - - /** - * @return the seqIndex - */ - public int getSeqIndex() - { - return seqIndex; - } - - /** - * Sets whether search also searches in sequence description text (default is - * no) - * - * @param value - */ - public void setIncludeDescription(boolean value) - { - this.includeDescription = value; - } }