X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fanalysis%2FFinder.java;h=181cc9f0639ea6d475b10d46bfa0b93fe37ee24b;hb=13a74c42398c4a2d3794242efec6e52b949b5e5d;hp=0996830bf806ef4fcb7aadc31ce04a81138f8298;hpb=24de2d6a6f3db1b9d55f367e2bf8ba112e202a8a;p=jalview.git diff --git a/src/jalview/analysis/Finder.java b/src/jalview/analysis/Finder.java index 0996830..181cc9f 100644 --- a/src/jalview/analysis/Finder.java +++ b/src/jalview/analysis/Finder.java @@ -20,21 +20,26 @@ */ package jalview.analysis; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Iterator; +import java.util.List; +import java.util.Locale; + +import com.stevesoft.pat.Regex; + +import jalview.api.AlignViewportI; import jalview.api.FinderI; import jalview.datamodel.AlignmentI; -import jalview.datamodel.Range; import jalview.datamodel.SearchResultMatchI; import jalview.datamodel.SearchResults; import jalview.datamodel.SearchResultsI; +import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceGroup; import jalview.datamodel.SequenceI; -import jalview.datamodel.VisibleContigsIterator; +import jalview.datamodel.features.SequenceFeaturesI; import jalview.util.Comparison; - -import java.util.List; -import java.util.Vector; - -import com.stevesoft.pat.Regex; +import jalview.util.MapList; /** * Implements the search algorithm for the Find dialog @@ -49,206 +54,316 @@ public class Finder implements FinderI /* * sequences matched by id or description */ - private Vector idMatches; + private List idMatches; /* - * the alignment to search over + * the viewport to search over */ - private AlignmentI alignment; + private AlignViewportI viewport; /* - * (optional) selection to restrict search to + * sequence index in alignment to search from */ - private SequenceGroup selection; + private int sequenceIndex; /* - * sequence index in alignment to search from + * position offset in sequence to search from, base 0 + * (position after start of last match for a 'find next') */ - private int sequenceIndex; + private int residueIndex; /* - * column position in sequence to search from, base 0 - * - absolute column number including any hidden columns - * (position after start of last match for a repeat search) + * the true sequence position of the start of the + * last sequence searched (when 'ignore hidden regions' does not apply) */ - private int columnIndex; + private int searchedSequenceStartPosition; + + /* + * when 'ignore hidden regions' applies, this holds the mapping from + * the visible sequence positions (1, 2, ...) to true sequence positions + */ + private MapList searchedSequenceMap; + + private String seqToSearch; /** - * Constructor for searching an alignment + * Constructor for searching a viewport * - * @param al + * @param av */ - public Finder(AlignmentI al) + public Finder(AlignViewportI av) { - this.alignment = al; + this.viewport = av; this.sequenceIndex = 0; - this.columnIndex = -1; + this.residueIndex = -1; } @Override - public void findAll(String theSearchString, SequenceGroup sg, - boolean matchCase, boolean searchDescription) + public void findAll(String theSearchString, boolean matchCase, + boolean searchDescription, boolean searchFeatureDesc, + boolean ignoreHidden) { /* * search from the start */ sequenceIndex = 0; - columnIndex = -1; + residueIndex = -1; - doFind(theSearchString, sg, matchCase, searchDescription, true); + doFind(theSearchString, matchCase, searchDescription, searchFeatureDesc, + true, ignoreHidden); /* * reset to start for next search */ sequenceIndex = 0; - columnIndex = -1; + residueIndex = -1; } @Override - public void findNext(String theSearchString, SequenceGroup sg, - boolean matchCase, boolean searchDescription) + public void findNext(String theSearchString, boolean matchCase, + boolean searchDescription, boolean searchFeatureDesc, + boolean ignoreHidden) { - doFind(theSearchString, sg, matchCase, searchDescription, false); - + doFind(theSearchString, matchCase, searchDescription, searchFeatureDesc, + false, ignoreHidden); + if (searchResults.isEmpty() && idMatches.isEmpty()) { /* * search failed - reset to start for next search */ sequenceIndex = 0; - columnIndex = -1; + residueIndex = -1; } } /** - * Performs a 'find next' or 'find all', optionally restricted to the - * specified selection region + * Performs a 'find next' or 'find all' * * @param theSearchString - * @param selectionRegion * @param matchCase * @param searchDescription * @param findAll + * @param ignoreHidden */ - protected void doFind(String theSearchString, SequenceGroup selectionRegion, - boolean matchCase, boolean searchDescription, boolean findAll) + protected void doFind(String theSearchString, boolean matchCase, + boolean searchDescription, boolean searchFeatureDesc, + boolean findAll, boolean ignoreHidden) { - this.selection = selectionRegion; + searchResults = new SearchResults(); + idMatches = new ArrayList<>(); + String searchString = matchCase ? theSearchString - : theSearchString.toUpperCase(); + : theSearchString.toUpperCase(Locale.ROOT); Regex searchPattern = new Regex(searchString); searchPattern.setIgnoreCase(!matchCase); - searchResults = new SearchResults(); - idMatches = new Vector<>(); - + SequenceGroup selection = viewport.getSelectionGroup(); if (selection != null && selection.getSize() < 1) { selection = null; // ? ignore column-only selection } + AlignmentI alignment = viewport.getAlignment(); int end = alignment.getHeight(); - while (sequenceIndex < end) + getSequence(ignoreHidden); + + boolean found = false; + while ((!found || findAll) && sequenceIndex < end) + { + found = findNextMatch(searchString, searchPattern, searchDescription, + searchFeatureDesc, ignoreHidden); + } + } + + /** + * Calculates and saves the sequence string to search. The string is + * restricted to the current selection region if there is one, and is saved + * with all gaps removed. + *

+ * If there are hidden columns, and option {@ignoreHidden} is selected, then + * only visible positions of the sequence are included, and a mapping is also + * constructed from the returned string positions to the true sequence + * positions. + *

+ * Note we have to do this each time {@code findNext} or {@code findAll} is + * called, in case the alignment, selection group or hidden columns have + * changed. In particular, if the sequence at offset {@code sequenceIndex} in + * the alignment is (no longer) in the selection group, search is advanced to + * the next sequence that is. + *

+ * Sets sequence string to the empty string if there are no more sequences (in + * selection group if any) at or after {@code sequenceIndex}. + *

+ * Returns true if a sequence could be found, false if end of alignment was + * reached + * + * @param ignoreHidden + * @return + */ + private boolean getSequence(boolean ignoreHidden) + { + AlignmentI alignment = viewport.getAlignment(); + if (sequenceIndex >= alignment.getHeight()) + { + seqToSearch = ""; + return false; + } + SequenceI seq = alignment.getSequenceAt(sequenceIndex); + SequenceGroup selection = viewport.getSelectionGroup(); + if (selection != null && !selection.contains(seq)) { - SequenceI seq = alignment.getSequenceAt(sequenceIndex); - boolean found = findNextMatch(seq, searchString, searchPattern, - searchDescription); - if (found && !findAll) + if (!nextSequence(ignoreHidden)) { - return; + return false; } - if (!found) + seq = alignment.getSequenceAt(sequenceIndex); + } + + String seqString = null; + if (ignoreHidden) + { + seqString = getVisibleSequence(seq); + this.searchedSequenceStartPosition = 1; + } + else + { + int startCol = 0; + int endCol = seq.getLength() - 1; + this.searchedSequenceStartPosition = seq.getStart(); + if (selection != null) { - sequenceIndex++; - columnIndex = -1; + startCol = selection.getStartRes(); + endCol = Math.min(endCol, selection.getEndRes()); + this.searchedSequenceStartPosition = seq.findPosition(startCol); } + seqString = seq.getSequenceAsString(startCol, endCol + 1); } + + /* + * remove gaps; note that even if this leaves an empty string, we 'search' + * the sequence anyway (for possible match on name or description) + */ + String ungapped = AlignSeq.extractGaps(Comparison.GapChars, seqString); + this.seqToSearch = ungapped; + + return true; } /** - * Answers the start-end column range of the visible region of - * sequence starting at or after the given column. - * If there are no hidden columns, this just returns the remaining width of - * the sequence. The range is restricted to the current selection - * if there is one. Answers null if there are no visible columns at or after - * column. + * Returns a string consisting of only the visible residues of {@code seq} + * from alignment column {@ fromColumn}, restricted to the current selection + * region if there is one. + *

+ * As a side-effect, also computes the mapping from the true sequence + * positions to the positions (1, 2, ...) of the returned sequence. This is to + * allow search matches in the visible sequence to be converted to sequence + * positions. + * + * @param seq + * @return */ - protected Range getNextVisibleSequenceRegion(SequenceI sequence, - int column) + private String getVisibleSequence(SequenceI seq) { - int seqColStart = column; - int seqColEnd = sequence.getLength() - 1; - /* - * restrict search to (next) visible column region, - * in case there are hidden columns + * get start / end columns of sequence and convert to base 0 + * (so as to match the visible column ranges) */ - VisibleContigsIterator visibleRegions = alignment.getHiddenColumns() - .getVisContigsIterator(column, alignment.getWidth(), - false); - int[] visible = visibleRegions.hasNext() ? visibleRegions.next() : null; - if (visible == null) - { - columnIndex = seqColEnd + 1; - return null; - } - seqColStart = Math.max(seqColStart, visible[0]); - seqColEnd = Math.min(seqColEnd, visible[1]); + int seqStartCol = seq.findIndex(seq.getStart()) - 1; + int seqEndCol = seq.findIndex(seq.getStart() + seq.getLength() - 1) - 1; + Iterator visibleColumns = viewport.getViewAsVisibleContigs(true); + StringBuilder visibleSeq = new StringBuilder(seqEndCol - seqStartCol); + List fromRanges = new ArrayList<>(); - /* - * restrict search to selected region if there is one - */ - if (selection != null) + while (visibleColumns.hasNext()) { - int selectionStart = selection.getStartRes(); - int selectionEnd = selection.getEndRes(); - if (selectionStart > seqColEnd || selectionEnd < seqColStart) + int[] range = visibleColumns.next(); + if (range[0] > seqEndCol) + { + // beyond the end of the sequence + break; + } + if (range[1] < seqStartCol) + { + // before the start of the sequence + continue; + } + String subseq = seq.getSequenceAsString(range[0], range[1] + 1); + String ungapped = AlignSeq.extractGaps(Comparison.GapChars, subseq); + visibleSeq.append(ungapped); + if (!ungapped.isEmpty()) { /* - * sequence region doesn't overlap selection region + * visible region includes at least one non-gap character, + * so add the range to the mapping being constructed */ - columnIndex = seqColEnd + 1; - return null; + int seqResFrom = seq.findPosition(range[0]); + int seqResTo = seqResFrom + ungapped.length() - 1; + fromRanges.add(new int[] { seqResFrom, seqResTo }); } - seqColStart = Math.max(seqColStart, selectionStart); - seqColEnd = Math.min(seqColEnd, selectionEnd); } - return new Range(seqColStart, seqColEnd); + /* + * construct the mapping + * from: visible sequence positions 1..length + * to: true residue positions of the alignment sequence + */ + List toRange = Arrays + .asList(new int[] + { 1, visibleSeq.length() }); + searchedSequenceMap = new MapList(fromRanges, toRange, 1, 1); + + return visibleSeq.toString(); } /** - * Finds the next match in the given sequence, starting at column at - * columnIndex. Answers true if a match is found, else false. If - * a match is found, columnIndex is advanced to the column after - * the start of the matched region, ready for a search from the next position. + * Advances the search to the next sequence in the alignment. Sequences not in + * the current selection group (if there is one) are skipped. The + * (sub-)sequence to be searched is extracted, gaps removed, and saved, or set + * to null if there are no more sequences to search. + *

+ * Returns true if a sequence could be found, false if end of alignment was + * reached * - * @param seq + * @param ignoreHidden + */ + private boolean nextSequence(boolean ignoreHidden) + { + sequenceIndex++; + residueIndex = -1; + + return getSequence(ignoreHidden); + } + + /** + * Finds the next match in the given sequence, starting at offset + * {@code residueIndex}. Answers true if a match is found, else false. + *

+ * If a match is found, {@code residueIndex} is advanced to the position after + * the start of the matched region, ready for the next search. + *

+ * If no match is found, {@code sequenceIndex} is advanced ready to search the + * next sequence. + * + * @param seqToSearch * @param searchString * @param searchPattern * @param matchDescription + * @param ignoreHidden * @return */ - protected boolean findNextMatch(SequenceI seq, String searchString, - Regex searchPattern, boolean matchDescription) + protected boolean findNextMatch(String searchString, Regex searchPattern, + boolean matchDescription, boolean matchFeatureDesc, + boolean ignoreHidden) { - if (selection != null && !selection.contains(seq)) - { - /* - * this sequence is not in the selection - advance to next sequence - */ - return false; - } - - if (columnIndex < 0) + if (residueIndex < 0) { /* * at start of sequence; try find by residue number, in sequence id, * or (optionally) in sequence description */ - if (doNonMotifSearches(seq, searchString, searchPattern, - matchDescription)) + if (doNonMotifSearches(searchString, searchPattern, matchDescription)) { return true; } @@ -257,83 +372,71 @@ public class Finder implements FinderI /* * search for next match in sequence string */ - int end = seq.getLength(); - while (columnIndex < end) + int end = seqToSearch.length(); + while (residueIndex < end) { - if (searchNextVisibleRegion(seq, searchPattern)) + boolean matched = searchPattern.searchFrom(seqToSearch, residueIndex); + if (matched) { - return true; + if (recordMatch(searchPattern, ignoreHidden)) + { + return true; + } + } + else + { + if (matchFeatureDesc) + { + // TODO - record last matched + matched = searchSequenceFeatures(residueIndex, searchPattern); + } + residueIndex = Integer.MAX_VALUE; } - } - return false; - } - - /** - * Searches the sequence, starting from columnIndex, and adds the - * next match (if any) to searchResults. The search is restricted - * to the next visible column region, and to the selection region - * if there is one. Answers true if a match is added, else false. - * - * @param seq - * @param searchPattern - * @return - */ - protected boolean searchNextVisibleRegion(SequenceI seq, Regex searchPattern) - { - Range visible = getNextVisibleSequenceRegion(seq, columnIndex); - if (visible == null) - { - return false; - } - String seqString = seq.getSequenceAsString(visible.start, visible.end + 1); - String noGaps = AlignSeq.extractGaps(Comparison.GapChars, seqString); - - if (searchPattern.search(noGaps)) - { - int sequenceStartPosition = seq.findPosition(visible.start); - recordMatch(seq, searchPattern, sequenceStartPosition); - return true; - } - else - { - /* - * no match - advance columnIndex past this visible region - * so the next visible region (if any) is searched next - */ - columnIndex = visible.end + 1; } + nextSequence(ignoreHidden); return false; } /** * Adds the match held in the searchPattern Regex to the * searchResults, unless it is a subregion of the last match - * recorded. columnIndex is advanced to the position after the + * recorded. residueIndex is advanced to the position after the * start of the matched region, ready for the next search. Answers true if a * match was added, else false. + *

+ * Matches that lie entirely within hidden regions of the alignment are not + * added. * - * @param seq * @param searchPattern - * @param firstResiduePosition + * @param ignoreHidden * @return */ - protected boolean recordMatch(SequenceI seq, Regex searchPattern, - int firstResiduePosition) + protected boolean recordMatch(Regex searchPattern, boolean ignoreHidden) { + SequenceI seq = viewport.getAlignment().getSequenceAt(sequenceIndex); + /* - * get start/end of the match in sequence coordinates + * convert start/end of the match to sequence coordinates */ int offset = searchPattern.matchedFrom(); - int matchStartPosition = firstResiduePosition + offset; - int matchEndPosition = matchStartPosition - + searchPattern.charsMatched() - 1; + int matchStartPosition = this.searchedSequenceStartPosition + offset; + int matchEndPosition = matchStartPosition + searchPattern.charsMatched() + - 1; /* - * update columnIndex to next column after the start of the match + * update residueIndex to next position after the start of the match * (findIndex returns a value base 1, columnIndex is held base 0) */ - columnIndex = seq.findIndex(matchStartPosition); + residueIndex = searchPattern.matchedFrom() + 1; + + /* + * return false if the match is entirely in a hidden region + */ + if (allHidden(seq, matchStartPosition, matchEndPosition)) + { + return false; + } /* * check that this match is not a subset of the previous one (JAL-2302) @@ -345,7 +448,7 @@ public class Finder implements FinderI if (lastMatch == null || !lastMatch.contains(seq, matchStartPosition, matchEndPosition)) { - searchResults.addResult(seq, matchStartPosition, matchEndPosition); + addMatch(seq, matchStartPosition, matchEndPosition, ignoreHidden); return true; } @@ -353,6 +456,60 @@ public class Finder implements FinderI } /** + * Adds one match to the stored list. If hidden residues are being skipped, + * then the match may need to be split into contiguous positions of the + * sequence (so it does not include skipped residues). + * + * @param seq + * @param matchStartPosition + * @param matchEndPosition + * @param ignoreHidden + */ + private void addMatch(SequenceI seq, int matchStartPosition, + int matchEndPosition, boolean ignoreHidden) + { + if (!ignoreHidden) + { + /* + * simple case + */ + searchResults.addResult(seq, matchStartPosition, matchEndPosition); + return; + } + + /* + * get start-end contiguous ranges in underlying sequence + */ + int[] truePositions = searchedSequenceMap + .locateInFrom(matchStartPosition, matchEndPosition); + searchResults.addResult(seq, truePositions); + } + + /** + * Returns true if all residues are hidden, else false + * + * @param seq + * @param fromPos + * @param toPos + * @return + */ + private boolean allHidden(SequenceI seq, int fromPos, int toPos) + { + if (!viewport.hasHiddenColumns()) + { + return false; + } + for (int res = fromPos; res <= toPos; res++) + { + if (isVisible(seq, res)) + { + return false; + } + } + return true; + } + + /** * Does searches other than for residue patterns. Currently this includes *

* Answers true if a match is found, else false. * - * @param seq * @param searchString * @param searchPattern * @param includeDescription * @return */ - protected boolean doNonMotifSearches(SequenceI seq, String searchString, + protected boolean doNonMotifSearches(String searchString, Regex searchPattern, boolean includeDescription) { + SequenceI seq = viewport.getAlignment().getSequenceAt(sequenceIndex); + /* * position sequence search to start of sequence */ - columnIndex = 0; - - if (searchForResidueNumber(seq, searchString)) + residueIndex = 0; + try { - return true; + int res = Integer.parseInt(searchString); + return searchForResidueNumber(seq, res); + } catch (NumberFormatException ex) + { + // search pattern is not a number } + if (searchSequenceName(seq, searchPattern)) { return true; @@ -391,6 +553,35 @@ public class Finder implements FinderI } /** + * Searches for a match with the sequence features, and if found, adds the + * sequence to the list of match ids, (but not as a duplicate). Answers true + * if a match was added, else false. + * + * TODO: allow incremental searching (ie next feature matched after last) + * + * @param seq + * @param searchPattern + * @return + */ + protected boolean searchSequenceFeatures(int from, Regex searchPattern) + { + boolean matched = false; + SequenceI seq = viewport.getAlignment().getSequenceAt(sequenceIndex); + + SequenceFeaturesI sf = seq.getFeatures(); + for (SequenceFeature feature : sf.getAllFeatures(null)) + { + if (searchPattern.search(feature.type) || (feature.description != null + && searchPattern.search(feature.description))) + { + searchResults.addResult(seq, feature.getBegin(), feature.getEnd()); + matched = true; + } + } + return matched; + } + + /** * Searches for a match with the sequence description, and if found, adds the * sequence to the list of match ids (but not as a duplicate). Answers true if * a match was added, else false. @@ -399,12 +590,14 @@ public class Finder implements FinderI * @param searchPattern * @return */ - protected boolean searchSequenceDescription(SequenceI seq, Regex searchPattern) + protected boolean searchSequenceDescription(SequenceI seq, + Regex searchPattern) { String desc = seq.getDescription(); - if (desc != null && searchPattern.search(desc) && !idMatches.contains(seq)) + if (desc != null && searchPattern.search(desc) + && !idMatches.contains(seq)) { - idMatches.addElement(seq); + idMatches.add(seq); return true; } return false; @@ -423,45 +616,58 @@ public class Finder implements FinderI { if (searchPattern.search(seq.getName()) && !idMatches.contains(seq)) { - idMatches.addElement(seq); + idMatches.add(seq); return true; } return false; } /** - * Tries to interpret the search string as a residue position, and if valid, + * If the residue position is valid for the sequence, and in a visible column, * adds the position to the search results and returns true, else answers - * false + * false. + * + * @param seq + * @param resNo + * @return */ - protected boolean searchForResidueNumber(SequenceI seq, String searchString) + protected boolean searchForResidueNumber(SequenceI seq, int resNo) { - try + if (seq.getStart() <= resNo && seq.getEnd() >= resNo) { - int res = Integer.parseInt(searchString); - if (seq.getStart() <= res && seq.getEnd() >= res) + if (isVisible(seq, resNo)) { - searchResults.addResult(seq, res, res); + searchResults.addResult(seq, resNo, resNo); return true; } - } catch (NumberFormatException ex) - { } return false; } - /* (non-Javadoc) - * @see jalview.analysis.FinderI#getIdMatch() + /** + * Returns true if the residue is in a visible column, else false + * + * @param seq + * @param res + * @return */ + private boolean isVisible(SequenceI seq, int res) + { + if (!viewport.hasHiddenColumns()) + { + return true; + } + int col = seq.findIndex(res); // base 1 + return viewport.getAlignment().getHiddenColumns().isVisible(col - 1); // base + // 0 + } + @Override - public Vector getIdMatches() + public List getIdMatches() { return idMatches; } - /* (non-Javadoc) - * @see jalview.analysis.FinderI#getSearchResults() - */ @Override public SearchResultsI getSearchResults() {