X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fanalysis%2FFinder.java;h=de57d691e3bed8679d4cb186b9462cec570a5687;hb=3ba65b98445f192453664357a1df25128fe07c84;hp=d7bf0a270a78cfc627362595b165ddac2380b901;hpb=745806901cdcda04d9b5c496cb5249a03af19971;p=jalview.git diff --git a/src/jalview/analysis/Finder.java b/src/jalview/analysis/Finder.java index d7bf0a2..de57d69 100644 --- a/src/jalview/analysis/Finder.java +++ b/src/jalview/analysis/Finder.java @@ -26,8 +26,10 @@ import jalview.datamodel.SearchResults; import jalview.datamodel.SearchResultsI; import jalview.datamodel.SequenceGroup; import jalview.datamodel.SequenceI; +import jalview.datamodel.VisibleContigsIterator; import jalview.util.Comparison; +import java.util.List; import java.util.Vector; import com.stevesoft.pat.Regex; @@ -38,7 +40,7 @@ import com.stevesoft.pat.Regex; public class Finder { /* - * match residue locations + * matched residue locations */ private SearchResultsI searchResults; @@ -75,13 +77,14 @@ public class Finder /* * sequence index in alignment to search from */ - private int seqIndex; + private int sequenceIndex; /* - * residue position in sequence to search from, base 1 + * column position in sequence to search from, base 0 + * - absolute column number including any hidden columns * (position of last match for a repeat search) */ - private int resIndex; + private int columnIndex; /** * Constructor to start searching an alignment, optionally restricting results @@ -102,15 +105,15 @@ public class Finder * @param al * @param sel * @param seqindex - * @param resindex + * @param colindex */ public Finder(AlignmentI al, SequenceGroup sel, int seqindex, - int resindex) + int colindex) { this.alignment = al; this.selection = sel; - this.seqIndex = seqindex; - this.resIndex = resindex; + this.sequenceIndex = seqindex; + this.columnIndex = colindex; } /** @@ -124,10 +127,16 @@ public class Finder */ public void find(String theSearchString) { - String searchString = caseSensitive ? theSearchString.toUpperCase() - : theSearchString; - Regex regex = new Regex(searchString); - regex.setIgnoreCase(!caseSensitive); + if (findAll) + { + sequenceIndex = 0; + columnIndex = -1; + } + + String searchString = caseSensitive ? theSearchString + : theSearchString.toUpperCase(); + Regex searchPattern = new Regex(searchString); + searchPattern.setIgnoreCase(!caseSensitive); searchResults = new SearchResults(); idMatch = new Vector<>(); @@ -136,115 +145,206 @@ public class Finder selection = null; // ? ignore column-only selection } - boolean finished = false; int end = alignment.getHeight(); - while (!finished && (seqIndex < end)) + while (sequenceIndex < end) { - SequenceI seq = alignment.getSequenceAt(seqIndex); - - if ((selection != null) && !selection.contains(seq)) + SequenceI seq = alignment.getSequenceAt(sequenceIndex); + boolean found = findNext(seq, searchString, searchPattern); + if (found && !findAll) { - // this sequence is not in the selection - skip to next sequence - seqIndex++; - resIndex = -1; - continue; + return; } - - if (resIndex < 0) + if (!found) { - /* - * at start of sequence; try find by residue number, in sequence id, - * or (optionally) in sequence description - */ - resIndex = 0; - if (doNonMotifSearches(seq, searchString, regex)) - { - return; - } + sequenceIndex++; + columnIndex = -1; } + } + } + + /** + * Answers the start-end column range of the visible region starting at or + * after the given column. if there are no hidden columns, this just returns + * the remaining width of the alignment. Answers null if there are no visible + * columns at or after column. + */ + protected int[] getNextVisibleRegion(int column) + { + VisibleContigsIterator visibleRegions = alignment.getHiddenColumns() + .getVisContigsIterator(column, alignment.getWidth(), + false); + return visibleRegions.hasNext() ? visibleRegions.next() : null; + } + + /** + * Finds the next match in the given sequence, starting at column at + * columnIndex. Answers true if a match is found, else false. If + * a match is found, columnIndex is advanced to the column after + * the start of the matched region, ready for a search from the next position. + * + * @param seq + * @param searchString + * @param searchPattern + * @return + */ + protected boolean findNext(SequenceI seq, String searchString, + Regex searchPattern) + { + if (selection != null && !selection.contains(seq)) + { + /* + * this sequence is not in the selection - advance to next sequence + */ + return false; + } - finished = searchSequenceString(seq, regex) && !findAll; + if (columnIndex < 0) + { + /* + * at start of sequence; try find by residue number, in sequence id, + * or (optionally) in sequence description + */ + if (doNonMotifSearches(seq, searchString, searchPattern)) + { + return true; + } + } - if (!finished) + /* + * search for next match in sequence string + */ + int end = seq.getLength(); + while (columnIndex < end) + { + if (searchNextVisibleRegion(seq, searchPattern)) { - seqIndex++; - resIndex = -1; + return true; } } + return false; } /** - * Searches the sequence, starting from resIndex (base 1), and - * adds matches to searchResults. The search is restricted to the - * selection region if there is one. Answers true if any match is - * added, else false. + * Searches the sequence, starting from columnIndex, and adds the + * next match (if any) to searchResults. The search is restricted + * to the next visible column region, and to the selection region + * if there is one. Answers true if a match is added, else false. * * @param seq - * @param regex + * @param searchPattern * @return */ - protected boolean searchSequenceString(SequenceI seq, Regex regex) + protected boolean searchNextVisibleRegion(SequenceI seq, Regex searchPattern) { /* - * Restrict search to selected region if there is one + * sequence columns to search (working in absolute column + * positions, base 0, including any hidden columns) */ - int seqColStart = 0; + int seqColStart = columnIndex; int seqColEnd = seq.getLength() - 1; - int residueOffset = 0; + + /* + * restrict search to (next) visible column region, + * in case there are hidden columns + */ + int[] visible = getNextVisibleRegion(columnIndex); + if (visible != null) + { + seqColStart = Math.max(seqColStart, visible[0]); + seqColEnd = Math.min(seqColEnd, visible[1]); + } + else + { + columnIndex = seqColEnd + 1; + return false; + } + + /* + * restrict search to selected region if there is one + */ if (selection != null) { - int selColEnd = selection.getEndRes(); - int selColStart = selection.getStartRes(); - if (selColStart > seqColEnd) + int selectionStart = selection.getStartRes(); + int selectionEnd = selection.getEndRes(); + if (selectionStart > seqColEnd || selectionEnd < seqColStart) { - return false; // sequence doesn't reach selection region + /* + * sequence region doesn't overlap selection region - + * no match, advance to next visible region + */ + columnIndex = seqColEnd + 1; + return false; } - seqColStart = selColStart; - seqColEnd = Math.min(seqColEnd, selColEnd); - residueOffset = seq.findPosition(selection.getStartRes()) - - seq.getStart(); + seqColStart = Math.max(seqColStart, selectionStart); + seqColEnd = Math.min(seqColEnd, selectionEnd); } - String seqString = seq.getSequenceAsString(seqColStart, seqColEnd + 1); + String seqString = seq.getSequenceAsString(seqColStart, seqColEnd + 1); String noGaps = AlignSeq.extractGaps(Comparison.GapChars, seqString); - SearchResultMatchI lastMatch = null; - boolean found = false; - - for (int r = resIndex; r < noGaps.length(); r++) + if (searchPattern.search(noGaps)) + { + int sequenceStartPosition = seq.findPosition(seqColStart); + recordMatch(seq, searchPattern, sequenceStartPosition); + return true; + } + else { /* - * searchFrom position is base 0, r is base 1, - * so search is from the position after the r'th residue + * no match - advance columnIndex past this visible region + * so the next visible region (if any) is searched next */ - if (regex.searchFrom(noGaps, r)) - { - resIndex = regex.matchedFrom(); - resIndex += residueOffset; // add back #residues before selection region - int matchStartPosition = resIndex + seq.getStart(); - int matchEndPosition = matchStartPosition + regex.charsMatched() - - 1; - if (lastMatch == null || !lastMatch.contains(seq, - matchStartPosition, matchEndPosition)) - { - lastMatch = searchResults.addResult(seq, matchStartPosition, - matchEndPosition); - found = true; - } - if (!findAll) - { - resIndex++; - return true; - } - r = resIndex; - } - else - { - break; - } + columnIndex = seqColEnd + 1; + } + + return false; + } + + /** + * Adds the match held in the searchPattern Regex to the + * searchResults, unless it is a subregion of the last match + * recorded. columnIndex is advanced to the position after the + * start of the matched region, ready for the next search. Answers true if a + * match was added, else false. + * + * @param seq + * @param searchPattern + * @param firstResiduePosition + * @return + */ + protected boolean recordMatch(SequenceI seq, Regex searchPattern, + int firstResiduePosition) + { + /* + * get start/end of the match in sequence coordinates + */ + int offset = searchPattern.matchedFrom(); + int matchStartPosition = firstResiduePosition + offset; + int matchEndPosition = matchStartPosition + + searchPattern.charsMatched() - 1; + + /* + * update columnIndex to next column after the start of the match + * (findIndex returns a value base 1, columnIndex is held base 0) + */ + columnIndex = seq.findIndex(matchStartPosition); + + /* + * check that this match is not a subset of the previous one (JAL-2302) + */ + List matches = searchResults.getResults(); + SearchResultMatchI lastMatch = matches.isEmpty() ? null + : matches.get(matches.size() - 1); + + if (lastMatch == null || !lastMatch.contains(seq, matchStartPosition, + matchEndPosition)) + { + searchResults.addResult(seq, matchStartPosition, matchEndPosition); + return true; } - return found; + + return false; } /** @@ -254,26 +354,30 @@ public class Finder *
  • match search string to sequence id
  • *
  • match search string to sequence description (optional)
  • * - * Answers true if a match is found and we are not doing 'find all' (so this - * search action is complete), else false. + * Answers true if a match is found, else false. * * @param seq * @param searchString - * @param regex + * @param searchPattern * @return */ protected boolean doNonMotifSearches(SequenceI seq, String searchString, - Regex regex) + Regex searchPattern) { - if (searchForResidueNumber(seq, searchString) && !findAll) + /* + * position sequence search to start of sequence + */ + columnIndex = 0; + + if (searchForResidueNumber(seq, searchString)) { return true; } - if (searchSequenceName(seq, regex) && !findAll) + if (searchSequenceName(seq, searchPattern)) { return true; } - if (searchSequenceDescription(seq, regex) && !findAll) + if (includeDescription && searchSequenceDescription(seq, searchPattern)) { return true; } @@ -281,22 +385,18 @@ public class Finder } /** - * Searches for a match with the sequence description, if that option was - * requested, and if found, adds the sequence to the list of match ids (but - * not as a duplicate). Answers true if a match was added, else false. + * Searches for a match with the sequence description, and if found, adds the + * sequence to the list of match ids (but not as a duplicate). Answers true if + * a match was added, else false. * * @param seq - * @param regex + * @param searchPattern * @return */ - protected boolean searchSequenceDescription(SequenceI seq, Regex regex) + protected boolean searchSequenceDescription(SequenceI seq, Regex searchPattern) { - if (!includeDescription) - { - return false; - } String desc = seq.getDescription(); - if (desc != null && regex.search(desc) && !idMatch.contains(seq)) + if (desc != null && searchPattern.search(desc) && !idMatch.contains(seq)) { idMatch.addElement(seq); return true; @@ -310,12 +410,12 @@ public class Finder * a match was added, else false. * * @param seq - * @param regex + * @param searchPattern * @return */ - protected boolean searchSequenceName(SequenceI seq, Regex regex) + protected boolean searchSequenceName(SequenceI seq, Regex searchPattern) { - if (regex.search(seq.getName()) && !idMatch.contains(seq)) + if (searchPattern.search(seq.getName()) && !idMatch.contains(seq)) { idMatch.addElement(seq); return true; @@ -325,7 +425,8 @@ public class Finder /** * Tries to interpret the search string as a residue position, and if valid, - * adds the position to the search results + * adds the position to the search results and returns true, else answers + * false */ protected boolean searchForResidueNumber(SequenceI seq, String searchString) { @@ -365,8 +466,8 @@ public class Finder } /** - * Returns the (possibly empty) list of matching sequences (when search - * includes searching sequence names) + * Returns the (possibly empty) list of sequences matched on sequence name or + * description * * @return */ @@ -376,7 +477,9 @@ public class Finder } /** - * @return the searchResults + * Answers the search results (possibly empty) from the last search + * + * @return */ public SearchResultsI getSearchResults() { @@ -384,19 +487,26 @@ public class Finder } /** - * @return the resIndex + * Answers the absolute column position (base 0, including any hidden columns) + * of the start of the last sequence motif (residue pattern) match found. A + * 'Find next' will search from the next position. + * + * @return */ - public int getResIndex() + public int getColumnIndex() { - return resIndex; + return columnIndex; } /** - * @return the seqIndex + * Answers the offset in the alignment (0..) of the sequence in which the last + * match was found (if any) + * + * @return */ - public int getSeqIndex() + public int getSequenceIndex() { - return seqIndex; + return sequenceIndex; } /**