X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fanalysis%2FFinder.java;h=fc6537910140ac4c8bfb378f34c1c1c283fc1589;hb=b7014049e421026d0f2d57179af9688472cd78f2;hp=96151d743f0c4782a7d864581f0ec934de5c36df;hpb=47168f025aefdaa044802bd5f8f510ffe43a4808;p=jalview.git diff --git a/src/jalview/analysis/Finder.java b/src/jalview/analysis/Finder.java index 96151d7..fc65379 100644 --- a/src/jalview/analysis/Finder.java +++ b/src/jalview/analysis/Finder.java @@ -1,6 +1,6 @@ /* - * Jalview - A Sequence Alignment Editor and Viewer (Version 2.8.2) - * Copyright (C) 2014 The Jalview Authors + * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) + * Copyright (C) $$Year-Rel$$ The Jalview Authors * * This file is part of Jalview. * @@ -20,342 +20,615 @@ */ package jalview.analysis; -import java.util.*; - -import jalview.datamodel.*; - -public class Finder +import jalview.api.AlignViewportI; +import jalview.api.FinderI; +import jalview.datamodel.AlignmentI; +import jalview.datamodel.SearchResultMatchI; +import jalview.datamodel.SearchResults; +import jalview.datamodel.SearchResultsI; +import jalview.datamodel.SequenceGroup; +import jalview.datamodel.SequenceI; +import jalview.util.Comparison; +import jalview.util.MapList; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Iterator; +import java.util.List; + +import com.stevesoft.pat.Regex; + +/** + * Implements the search algorithm for the Find dialog + */ +public class Finder implements FinderI { - /** - * Implements the search algorithms for the Find dialog box. + /* + * matched residue locations */ - SearchResults searchResults; + private SearchResultsI searchResults; - AlignmentI alignment; + /* + * sequences matched by id or description + */ + private List idMatches; - jalview.datamodel.SequenceGroup selection = null; + /* + * the viewport to search over + */ + private AlignViewportI viewport; - Vector idMatch = null; + /* + * sequence index in alignment to search from + */ + private int sequenceIndex; - boolean caseSensitive = false; + /* + * position offset in sequence to search from, base 0 + * (position after start of last match for a 'find next') + */ + private int residueIndex; - boolean findAll = false; + /* + * the true sequence position of the start of the + * last sequence searched (when 'ignore hidden regions' does not apply) + */ + private int searchedSequenceStartPosition; + + /* + * when 'ignore hidden regions' applies, this holds the mapping from + * the visible sequence positions (1, 2, ...) to true sequence positions + */ + private MapList searchedSequenceMap; - com.stevesoft.pat.Regex regex = null; + private String seqToSearch; /** - * hold's last-searched position between calles to find(false) + * Constructor for searching a viewport + * + * @param av */ - int seqIndex = 0, resIndex = -1; + public Finder(AlignViewportI av) + { + this.viewport = av; + this.sequenceIndex = 0; + this.residueIndex = -1; + } + + @Override + public void findAll(String theSearchString, boolean matchCase, + boolean searchDescription, boolean ignoreHidden) + { + /* + * search from the start + */ + sequenceIndex = 0; + residueIndex = -1; + + doFind(theSearchString, matchCase, searchDescription, true, + ignoreHidden); + + /* + * reset to start for next search + */ + sequenceIndex = 0; + residueIndex = -1; + } - public Finder(AlignmentI alignment, SequenceGroup selection) + @Override + public void findNext(String theSearchString, boolean matchCase, + boolean searchDescription, boolean ignoreHidden) { - this.alignment = alignment; - this.selection = selection; + doFind(theSearchString, matchCase, searchDescription, false, + ignoreHidden); + + if (searchResults.isEmpty() && idMatches.isEmpty()) + { + /* + * search failed - reset to start for next search + */ + sequenceIndex = 0; + residueIndex = -1; + } } /** - * restart search at given sequence and residue on alignment and (optionally) - * contained in selection + * Performs a 'find next' or 'find all' * - * @param alignment - * @param selectionGroup - * @param seqIndex - * @param resIndex + * @param theSearchString + * @param matchCase + * @param searchDescription + * @param findAll + * @param ignoreHidden */ - public Finder(AlignmentI alignment, SequenceGroup selectionGroup, - int seqIndex, int resIndex) + protected void doFind(String theSearchString, boolean matchCase, + boolean searchDescription, boolean findAll, boolean ignoreHidden) { - this(alignment, selectionGroup); - this.seqIndex = seqIndex; - this.resIndex = resIndex; - } + searchResults = new SearchResults(); + idMatches = new ArrayList<>(); - public boolean find(String searchString) - { - boolean hasResults = false; - if (!caseSensitive) + String searchString = matchCase ? theSearchString + : theSearchString.toUpperCase(); + Regex searchPattern = new Regex(searchString); + searchPattern.setIgnoreCase(!matchCase); + + SequenceGroup selection = viewport.getSelectionGroup(); + if (selection != null && selection.getSize() < 1) { - searchString = searchString.toUpperCase(); + selection = null; // ? ignore column-only selection } - regex = new com.stevesoft.pat.Regex(searchString); - regex.setIgnoreCase(!caseSensitive); - searchResults = new SearchResults(); - idMatch = new Vector(); - Sequence seq; - String item = null; - boolean found = false; + + AlignmentI alignment = viewport.getAlignment(); int end = alignment.getHeight(); - // ///////////////////////////////////////////// + getSequence(ignoreHidden); - if (selection != null) + boolean found = false; + while (!found || findAll) { - if ((selection.getSize() < 1) - || ((selection.getEndRes() - selection.getStartRes()) < 2)) + found = findNextMatch(searchString, searchPattern, searchDescription, + ignoreHidden); + if (sequenceIndex >= end) { - selection = null; + break; } } + } - while (!found && (seqIndex < end)) + /** + * Calculates and saves the sequence string to search. The string is restricted + * to the current selection region if there is one, and is saved with all gaps + * removed. + *

+ * If there are hidden columns, and option {@ignoreHidden} is selected, then + * only visible positions of the sequence are included, and a mapping is also + * constructed from the returned string positions to the true sequence + * positions. + *

+ * Note we have to do this each time {@code findNext} or {@code findAll} is + * called, in case the alignment, selection group or hidden columns have + * changed. In particular, if the sequence at offset {@code sequenceIndex} in + * the alignment is (no longer) in the selection group, search is advanced to + * the next sequence that is. + *

+ * Sets sequence string to the empty string if there are no more sequences (in + * selection group if any) at or after {@code sequenceIndex}. + *

+ * Returns true if a sequence could be found, false if end of alignment was + * reached + * + * @param ignoreHidden + * @return + */ + private boolean getSequence(boolean ignoreHidden) + { + AlignmentI alignment = viewport.getAlignment(); + if (sequenceIndex >= alignment.getHeight()) { - seq = (Sequence) alignment.getSequenceAt(seqIndex); - - if ((selection != null && selection.getSize() > 0) - && !selection.getSequences(null).contains(seq)) - { - seqIndex++; - resIndex = -1; - - continue; - } - if (resIndex < 0) + seqToSearch = ""; + return false; + } + SequenceI seq = alignment.getSequenceAt(sequenceIndex); + SequenceGroup selection = viewport.getSelectionGroup(); + if (selection != null && !selection.contains(seq)) + { + if (!nextSequence(ignoreHidden)) { - resIndex = 0; - // test for one off matches - sequence position and sequence ID - // //// is the searchString a residue number? - try - { - int res = Integer.parseInt(searchString); - // possibly a residue number - check if valid for seq - if (seq.getEnd() >= res) - { - searchResults.addResult(seq, res, res); - hasResults = true; - // resIndex=seq.getLength(); - // seqIndex++; - if (!findAll) - { - found = true; - break; - } - } - } catch (NumberFormatException ex) - { - } - - if (regex.search(seq.getName())) - { - idMatch.addElement(seq); - hasResults = true; - if (!findAll) - { - // stop and return the match - found = true; - break; - } - } + return false; } - item = seq.getSequenceAsString(); + seq = alignment.getSequenceAt(sequenceIndex); + } - if ((selection != null) - && (selection.getEndRes() < alignment.getWidth() - 1)) + String seqString = null; + if (ignoreHidden) + { + seqString = getVisibleSequence(seq); + } + else + { + int startCol = 0; + int endCol = seq.getLength() - 1; + this.searchedSequenceStartPosition = seq.getStart(); + if (selection != null) { - item = item.substring(0, selection.getEndRes() + 1); + startCol = selection.getStartRes(); + endCol = Math.min(endCol, selection.getEndRes()); + this.searchedSequenceStartPosition = seq.findPosition(startCol); } + seqString = seq.getSequenceAsString(startCol, endCol + 1); + } - // /Shall we ignore gaps???? - JBPNote: Add Flag for forcing this or not - StringBuffer noGapsSB = new StringBuffer(); - int insertCount = 0; - Vector spaces = new Vector(); + /* + * remove gaps; note that even if this leaves an empty string, we 'search' + * the sequence anyway (for possible match on name or description) + */ + String ungapped = AlignSeq.extractGaps(Comparison.GapChars, seqString); + this.seqToSearch = ungapped; - for (int j = 0; j < item.length(); j++) - { - if (!jalview.util.Comparison.isGap(item.charAt(j))) - { - noGapsSB.append(item.charAt(j)); - spaces.addElement(new Integer(insertCount)); - } - else - { - insertCount++; - } - } + return true; + } - String noGaps = noGapsSB.toString(); + /** + * Returns a string consisting of only the visible residues of {@code seq} from + * alignment column {@ fromColumn}, restricted to the current selection region + * if there is one. + *

+ * As a side-effect, also computes the mapping from the true sequence positions + * to the positions (1, 2, ...) of the returned sequence. This is to allow + * search matches in the visible sequence to be converted to sequence positions. + * + * @param seq + * @return + */ + private String getVisibleSequence(SequenceI seq) + { + int seqStartCol = seq.findIndex(seq.getStart()); + int seqEndCol = seq.findIndex(seq.getStart() + seq.getLength() - 1); + Iterator visibleColumns = viewport.getViewAsVisibleContigs(true); + StringBuilder visibleSeq = new StringBuilder(seqEndCol - seqStartCol); + List fromRanges = new ArrayList<>(); - for (int r = resIndex; r < noGaps.length(); r++) + while (visibleColumns.hasNext()) + { + int[] range = visibleColumns.next(); + if (range[0] > seqEndCol) { - - if (regex.searchFrom(noGaps, r)) - { - resIndex = regex.matchedFrom(); - - if ((selection != null && selection.getSize() > 0) - && ((resIndex + Integer.parseInt(spaces.elementAt( - resIndex).toString())) < selection.getStartRes())) - { - continue; - } - - int sres = seq - .findPosition(resIndex - + Integer.parseInt(spaces.elementAt(resIndex) - .toString())); - int eres = seq.findPosition(regex.matchedTo() - - 1 - + Integer.parseInt(spaces - .elementAt(regex.matchedTo() - 1).toString())); - - searchResults.addResult(seq, sres, eres); - hasResults = true; - if (!findAll) - { - // thats enough, break and display the result - found = true; - resIndex++; - - break; - } - - r = resIndex; - } - else - { - break; - } + // beyond the end of the sequence + break; } - - if (!found) + if (range[1] < seqStartCol) { - seqIndex++; - resIndex = -1; + // before the start of the sequence + continue; + } + String subseq = seq.getSequenceAsString(range[0], range[1] + 1); + String ungapped = AlignSeq.extractGaps(Comparison.GapChars, subseq); + visibleSeq.append(ungapped); + if (!ungapped.isEmpty()) + { + /* + * visible region includes at least one non-gap character, + * so add the range to the mapping being constructed + */ + int seqResFrom = seq.findPosition(range[0]); + int seqResTo = seq.findPosition(range[1]); + fromRanges.add(new int[] { seqResFrom, seqResTo }); } } - /** - * We now search the Id string in the main search loop. for (int id = 0; id - * < alignment.getHeight(); id++) { if - * (regex.search(alignment.getSequenceAt(id).getName())) { - * idMatch.addElement(alignment.getSequenceAt(id)); hasResults = true; } } + /* + * construct the mapping + * from: visible sequence positions 1..length + * to: true residue positions of the alignment sequence */ - return hasResults; - } + List toRange = Arrays + .asList(new int[] + { 1, visibleSeq.length() }); + searchedSequenceMap = new MapList(fromRanges, toRange, 1, 1); - /** - * @return the alignment - */ - public AlignmentI getAlignment() - { - return alignment; + return visibleSeq.toString(); } /** - * @param alignment - * the alignment to set + * Advances the search to the next sequence in the alignment. Sequences not in + * the current selection group (if there is one) are skipped. The (sub-)sequence + * to be searched is extracted, gaps removed, and saved, or set to null if there + * are no more sequences to search. + *

+ * Returns true if a sequence could be found, false if end of alignment was + * reached + * + * @param ignoreHidden */ - public void setAlignment(AlignmentI alignment) + private boolean nextSequence(boolean ignoreHidden) { - this.alignment = alignment; - } + sequenceIndex++; + residueIndex = -1; - /** - * @return the caseSensitive - */ - public boolean isCaseSensitive() - { - return caseSensitive; + return getSequence(ignoreHidden); } /** - * @param caseSensitive - * the caseSensitive to set + * Finds the next match in the given sequence, starting at offset + * {@code residueIndex}. Answers true if a match is found, else false. + *

+ * If a match is found, {@code residueIndex} is advanced to the position after + * the start of the matched region, ready for the next search. + *

+ * If no match is found, {@code sequenceIndex} is advanced ready to search the + * next sequence. + * + * @param seqToSearch + * @param searchString + * @param searchPattern + * @param matchDescription + * @param ignoreHidden + * @return */ - public void setCaseSensitive(boolean caseSensitive) + protected boolean findNextMatch(String searchString, + Regex searchPattern, boolean matchDescription, + boolean ignoreHidden) { - this.caseSensitive = caseSensitive; - } + if (residueIndex < 0) + { + /* + * at start of sequence; try find by residue number, in sequence id, + * or (optionally) in sequence description + */ + if (doNonMotifSearches(searchString, searchPattern, + matchDescription)) + { + return true; + } + } - /** - * @return the findAll - */ - public boolean isFindAll() - { - return findAll; + /* + * search for next match in sequence string + */ + int end = seqToSearch.length(); + while (residueIndex < end) + { + boolean matched = searchPattern.searchFrom(seqToSearch, residueIndex); + if (matched) + { + if (recordMatch(searchPattern, ignoreHidden)) + { + return true; + } + } + else + { + residueIndex = Integer.MAX_VALUE; + } + } + + nextSequence(ignoreHidden); + return false; } /** - * @param findAll - * the findAll to set + * Adds the match held in the searchPattern Regex to the + * searchResults, unless it is a subregion of the last match + * recorded. residueIndex is advanced to the position after the + * start of the matched region, ready for the next search. Answers true if a + * match was added, else false. + *

+ * Matches that lie entirely within hidden regions of the alignment are not + * added. + * + * @param searchPattern + * @param ignoreHidden + * @return */ - public void setFindAll(boolean findAll) + protected boolean recordMatch(Regex searchPattern, boolean ignoreHidden) { - this.findAll = findAll; + SequenceI seq = viewport.getAlignment().getSequenceAt(sequenceIndex); + + /* + * convert start/end of the match to sequence coordinates + */ + int offset = searchPattern.matchedFrom(); + int matchStartPosition = this.searchedSequenceStartPosition + offset; + int matchEndPosition = matchStartPosition + + searchPattern.charsMatched() - 1; + + /* + * update residueIndex to next position after the start of the match + * (findIndex returns a value base 1, columnIndex is held base 0) + */ + residueIndex += offset + 1; + + /* + * return false if the match is entirely in a hidden region + */ + if (allHidden(seq, matchStartPosition, matchEndPosition)) + { + return false; + } + + /* + * check that this match is not a subset of the previous one (JAL-2302) + */ + List matches = searchResults.getResults(); + SearchResultMatchI lastMatch = matches.isEmpty() ? null + : matches.get(matches.size() - 1); + + if (lastMatch == null || !lastMatch.contains(seq, matchStartPosition, + matchEndPosition)) + { + addMatch(seq, matchStartPosition, matchEndPosition, ignoreHidden); + return true; + } + + return false; } /** - * @return the selection + * Adds one match to the stored list. If hidden residues are being skipped, then + * the match may need to be split into contiguous positions of the sequence (so + * it does not include skipped residues). + * + * @param seq + * @param matchStartPosition + * @param matchEndPosition + * @param ignoreHidden */ - public jalview.datamodel.SequenceGroup getSelection() + private void addMatch(SequenceI seq, int matchStartPosition, + int matchEndPosition, boolean ignoreHidden) { - return selection; + if (!ignoreHidden) + { + /* + * simple case + */ + searchResults.addResult(seq, matchStartPosition, matchEndPosition); + return; + } + + /* + * get start-end contiguous ranges in underlying sequence + */ + int[] truePositions = searchedSequenceMap + .locateInFrom(matchStartPosition, matchEndPosition); + for (int i = 0; i < truePositions.length - 1; i += 2) + { + searchResults.addResult(seq, truePositions[i], truePositions[i + 1]); + } } /** - * @param selection - * the selection to set + * Returns true if all residues are hidden, else false + * + * @param seq + * @param fromPos + * @param toPos + * @return */ - public void setSelection(jalview.datamodel.SequenceGroup selection) + private boolean allHidden(SequenceI seq, int fromPos, int toPos) { - this.selection = selection; + if (!viewport.hasHiddenColumns()) + { + return false; + } + for (int res = fromPos; res <= toPos; res++) + { + if (isVisible(seq, res)) + { + return false; + } + } + return true; } /** - * @return the idMatch + * Does searches other than for residue patterns. Currently this includes + *

+ * Answers true if a match is found, else false. + * + * @param searchString + * @param searchPattern + * @param includeDescription + * @return */ - public Vector getIdMatch() + protected boolean doNonMotifSearches(String searchString, + Regex searchPattern, boolean includeDescription) { - return idMatch; + SequenceI seq = viewport.getAlignment().getSequenceAt(sequenceIndex); + + /* + * position sequence search to start of sequence + */ + residueIndex = 0; + try + { + int res = Integer.parseInt(searchString); + return searchForResidueNumber(seq, res); + } catch (NumberFormatException ex) + { + // search pattern is not a number + } + + if (searchSequenceName(seq, searchPattern)) + { + return true; + } + if (includeDescription && searchSequenceDescription(seq, searchPattern)) + { + return true; + } + return false; } /** - * @return the regex + * Searches for a match with the sequence description, and if found, adds the + * sequence to the list of match ids (but not as a duplicate). Answers true if + * a match was added, else false. + * + * @param seq + * @param searchPattern + * @return */ - public com.stevesoft.pat.Regex getRegex() + protected boolean searchSequenceDescription(SequenceI seq, Regex searchPattern) { - return regex; + String desc = seq.getDescription(); + if (desc != null && searchPattern.search(desc) && !idMatches.contains(seq)) + { + idMatches.add(seq); + return true; + } + return false; } /** - * @return the searchResults + * Searches for a match with the sequence name, and if found, adds the + * sequence to the list of match ids (but not as a duplicate). Answers true if + * a match was added, else false. + * + * @param seq + * @param searchPattern + * @return */ - public SearchResults getSearchResults() + protected boolean searchSequenceName(SequenceI seq, Regex searchPattern) { - return searchResults; + if (searchPattern.search(seq.getName()) && !idMatches.contains(seq)) + { + idMatches.add(seq); + return true; + } + return false; } /** - * @return the resIndex + * If the residue position is valid for the sequence, and in a visible column, + * adds the position to the search results and returns true, else answers false. + * + * @param seq + * @param resNo + * @return */ - public int getResIndex() + protected boolean searchForResidueNumber(SequenceI seq, int resNo) { - return resIndex; + if (seq.getStart() <= resNo && seq.getEnd() >= resNo) + { + if (isVisible(seq, resNo)) + { + searchResults.addResult(seq, resNo, resNo); + return true; + } + } + return false; } /** - * @param resIndex - * the resIndex to set + * Returns true if the residue is in a visible column, else false + * + * @param seq + * @param res + * @return */ - public void setResIndex(int resIndex) + private boolean isVisible(SequenceI seq, int res) { - this.resIndex = resIndex; + if (!viewport.hasHiddenColumns()) + { + return true; + } + int col = seq.findIndex(res); // base 1 + return viewport.getAlignment().getHiddenColumns().isVisible(col - 1); // base 0 } - /** - * @return the seqIndex - */ - public int getSeqIndex() + @Override + public List getIdMatches() { - return seqIndex; + return idMatches; } - /** - * @param seqIndex - * the seqIndex to set - */ - public void setSeqIndex(int seqIndex) + @Override + public SearchResultsI getSearchResults() { - this.seqIndex = seqIndex; + return searchResults; } }