X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fanalysis%2FFinder.java;fp=src%2Fjalview%2Fanalysis%2FFinder.java;h=d7bf0a270a78cfc627362595b165ddac2380b901;hb=dfc18d64376ae95cfdbf3c112825587a8891179c;hp=191f6e832ae0dd24176b8705d6c65dd09cec9283;hpb=9bc0c6be2b195af9fed245e9e7ae27d73196fac8;p=jalview.git diff --git a/src/jalview/analysis/Finder.java b/src/jalview/analysis/Finder.java index 191f6e8..d7bf0a2 100644 --- a/src/jalview/analysis/Finder.java +++ b/src/jalview/analysis/Finder.java @@ -28,301 +28,340 @@ import jalview.datamodel.SequenceGroup; import jalview.datamodel.SequenceI; import jalview.util.Comparison; -import java.util.ArrayList; -import java.util.List; import java.util.Vector; import com.stevesoft.pat.Regex; +/** + * Implements the search algorithm for the Find dialog + */ public class Finder { - /** - * Implements the search algorithms for the Find dialog box. + /* + * match residue locations */ - SearchResultsI searchResults; + private SearchResultsI searchResults; - AlignmentI alignment; + /* + * sequences matched by id or description + */ + private Vector idMatch; - SequenceGroup selection = null; + /* + * the alignment to search over + */ + private AlignmentI alignment; - Vector idMatch = null; + /* + * (optional) selection to restrict search to + */ + private SequenceGroup selection; - boolean caseSensitive = false; + /* + * set true for case-sensitive search (default is false) + */ + private boolean caseSensitive; - private boolean includeDescription = false; + /* + * set true to search sequence description (default is false) + */ + private boolean includeDescription; - boolean findAll = false; + /* + * set true to return all matches (default is next match only) + */ + private boolean findAll; - Regex regex = null; + /* + * sequence index in alignment to search from + */ + private int seqIndex; - /** - * holds last-searched position between calls to find(false) + /* + * residue position in sequence to search from, base 1 + * (position of last match for a repeat search) */ - int seqIndex = 0, resIndex = -1; + private int resIndex; - public Finder(AlignmentI alignment, SequenceGroup selection) + /** + * Constructor to start searching an alignment, optionally restricting results + * to a selection + * + * @param al + * @param sel + */ + public Finder(AlignmentI al, SequenceGroup sel) { - this.alignment = alignment; - this.selection = selection; + this(al, sel, 0, -1); } /** - * restart search at given sequence and residue on alignment and (optionally) - * contained in selection + * Constructor to resume search at given sequence and residue on alignment and + * (optionally) restricted to a selection * - * @param alignment - * @param selectionGroup - * @param seqIndex - * @param resIndex + * @param al + * @param sel + * @param seqindex + * @param resindex */ - public Finder(AlignmentI alignment, SequenceGroup selectionGroup, - int seqIndex, int resIndex) + public Finder(AlignmentI al, SequenceGroup sel, int seqindex, + int resindex) { - this(alignment, selectionGroup); - this.seqIndex = seqIndex; - this.resIndex = resIndex; + this.alignment = al; + this.selection = sel; + this.seqIndex = seqindex; + this.resIndex = resindex; } - public boolean find(String searchString) + /** + * Performs a find for the given search string. By default the next match is + * found, but if setFindAll(true) has been called, then all matches are found. + * Sequences matched by id or description can be retrieved by getIdMatch(), + * and matched residue patterns by getSearchResults(). + * + * @param theSearchString + * @return + */ + public void find(String theSearchString) { - boolean hasResults = false; - if (!caseSensitive) - { - searchString = searchString.toUpperCase(); - } - regex = new Regex(searchString); + String searchString = caseSensitive ? theSearchString.toUpperCase() + : theSearchString; + Regex regex = new Regex(searchString); regex.setIgnoreCase(!caseSensitive); searchResults = new SearchResults(); - idMatch = new Vector(); - String item = null; - boolean found = false; - int end = alignment.getHeight(); - - // ///////////////////////////////////////////// + idMatch = new Vector<>(); - if (selection != null) + if (selection != null && selection.getSize() < 1) { - if ((selection.getSize() < 1) - || ((selection.getEndRes() - selection.getStartRes()) < 2)) - { - selection = null; - } + selection = null; // ? ignore column-only selection } - SearchResultMatchI lastm = null; - while (!found && (seqIndex < end)) + boolean finished = false; + int end = alignment.getHeight(); + + while (!finished && (seqIndex < end)) { SequenceI seq = alignment.getSequenceAt(seqIndex); - if ((selection != null && selection.getSize() > 0) - && !selection.getSequences(null).contains(seq)) + if ((selection != null) && !selection.contains(seq)) { + // this sequence is not in the selection - skip to next sequence seqIndex++; resIndex = -1; - continue; } + if (resIndex < 0) { + /* + * at start of sequence; try find by residue number, in sequence id, + * or (optionally) in sequence description + */ resIndex = 0; - // test for one off matches - sequence position and sequence ID - // //// is the searchString a residue number? - try - { - int res = Integer.parseInt(searchString); - // possibly a residue number - check if valid for seq - if (seq.getEnd() >= res) - { - searchResults.addResult(seq, res, res); - hasResults = true; - // resIndex=seq.getLength(); - // seqIndex++; - if (!findAll) - { - found = true; - break; - } - } - } catch (NumberFormatException ex) + if (doNonMotifSearches(seq, searchString, regex)) { + return; } + } - if (regex.search(seq.getName()) && !idMatch.contains(seq)) - { - idMatch.addElement(seq); - hasResults = true; - if (!findAll) - { - // stop and return the match - found = true; - break; - } - } + finished = searchSequenceString(seq, regex) && !findAll; - if (isIncludeDescription() && seq.getDescription() != null - && regex.search(seq.getDescription()) - && !idMatch.contains(seq)) - { - idMatch.addElement(seq); - hasResults = true; - if (!findAll) - { - // stop and return the match - found = true; - break; - } - } + if (!finished) + { + seqIndex++; + resIndex = -1; } - item = seq.getSequenceAsString(); + } + } - if ((selection != null) - && (selection.getEndRes() < alignment.getWidth() - 1)) + /** + * Searches the sequence, starting from resIndex (base 1), and + * adds matches to searchResults. The search is restricted to the + * selection region if there is one. Answers true if any match is + * added, else false. + * + * @param seq + * @param regex + * @return + */ + protected boolean searchSequenceString(SequenceI seq, Regex regex) + { + /* + * Restrict search to selected region if there is one + */ + int seqColStart = 0; + int seqColEnd = seq.getLength() - 1; + int residueOffset = 0; + if (selection != null) + { + int selColEnd = selection.getEndRes(); + int selColStart = selection.getStartRes(); + if (selColStart > seqColEnd) { - item = item.substring(0, selection.getEndRes() + 1); + return false; // sequence doesn't reach selection region } + seqColStart = selColStart; + seqColEnd = Math.min(seqColEnd, selColEnd); + residueOffset = seq.findPosition(selection.getStartRes()) + - seq.getStart(); + } + String seqString = seq.getSequenceAsString(seqColStart, seqColEnd + 1); - // /Shall we ignore gaps???? - JBPNote: Add Flag for forcing this or not - StringBuilder noGapsSB = new StringBuilder(); - int insertCount = 0; - List spaces = new ArrayList(); + String noGaps = AlignSeq.extractGaps(Comparison.GapChars, seqString); - for (int j = 0; j < item.length(); j++) - { - if (!Comparison.isGap(item.charAt(j))) - { - noGapsSB.append(item.charAt(j)); - spaces.add(Integer.valueOf(insertCount)); - } - else - { - insertCount++; - } - } + SearchResultMatchI lastMatch = null; + boolean found = false; - String noGaps = noGapsSB.toString(); - for (int r = resIndex; r < noGaps.length(); r++) + for (int r = resIndex; r < noGaps.length(); r++) + { + /* + * searchFrom position is base 0, r is base 1, + * so search is from the position after the r'th residue + */ + if (regex.searchFrom(noGaps, r)) { - - if (regex.searchFrom(noGaps, r)) + resIndex = regex.matchedFrom(); + resIndex += residueOffset; // add back #residues before selection region + int matchStartPosition = resIndex + seq.getStart(); + int matchEndPosition = matchStartPosition + regex.charsMatched() + - 1; + if (lastMatch == null || !lastMatch.contains(seq, + matchStartPosition, matchEndPosition)) { - resIndex = regex.matchedFrom(); - - if ((selection != null && selection.getSize() > 0) && (resIndex - + spaces.get(resIndex) < selection.getStartRes())) - { - continue; - } - // if invalid string used, then regex has no matched to/from - int sres = seq.findPosition(resIndex + spaces.get(resIndex)); - int eres = seq.findPosition(regex.matchedTo() - 1 - + (spaces.get(regex.matchedTo() - 1))); - // only add result if not contained in previous result - if (lastm == null || (lastm.getSequence() != seq - || (!(lastm.getStart() <= sres - && lastm.getEnd() >= eres)))) - { - lastm = searchResults.addResult(seq, sres, eres); - } - hasResults = true; - if (!findAll) - { - // thats enough, break and display the result - found = true; - resIndex++; - - break; - } - - r = resIndex; + lastMatch = searchResults.addResult(seq, matchStartPosition, + matchEndPosition); + found = true; } - else + if (!findAll) { - break; + resIndex++; + return true; } + r = resIndex; } - - if (!found) + else { - seqIndex++; - resIndex = -1; + break; } } - - /** - * We now search the Id string in the main search loop. for (int id = 0; id - * < alignment.getHeight(); id++) { if - * (regex.search(alignment.getSequenceAt(id).getName())) { - * idMatch.addElement(alignment.getSequenceAt(id)); hasResults = true; } } - */ - return hasResults; - } - - /** - * @return the alignment - */ - public AlignmentI getAlignment() - { - return alignment; - } - - /** - * @param alignment - * the alignment to set - */ - public void setAlignment(AlignmentI alignment) - { - this.alignment = alignment; + return found; } /** - * @return the caseSensitive + * Does searches other than for residue patterns. Currently this includes + *
    + *
  • find residue by position (if search string is a number)
  • + *
  • match search string to sequence id
  • + *
  • match search string to sequence description (optional)
  • + *
+ * Answers true if a match is found and we are not doing 'find all' (so this + * search action is complete), else false. + * + * @param seq + * @param searchString + * @param regex + * @return */ - public boolean isCaseSensitive() + protected boolean doNonMotifSearches(SequenceI seq, String searchString, + Regex regex) { - return caseSensitive; + if (searchForResidueNumber(seq, searchString) && !findAll) + { + return true; + } + if (searchSequenceName(seq, regex) && !findAll) + { + return true; + } + if (searchSequenceDescription(seq, regex) && !findAll) + { + return true; + } + return false; } /** - * @param caseSensitive - * the caseSensitive to set + * Searches for a match with the sequence description, if that option was + * requested, and if found, adds the sequence to the list of match ids (but + * not as a duplicate). Answers true if a match was added, else false. + * + * @param seq + * @param regex + * @return */ - public void setCaseSensitive(boolean caseSensitive) + protected boolean searchSequenceDescription(SequenceI seq, Regex regex) { - this.caseSensitive = caseSensitive; + if (!includeDescription) + { + return false; + } + String desc = seq.getDescription(); + if (desc != null && regex.search(desc) && !idMatch.contains(seq)) + { + idMatch.addElement(seq); + return true; + } + return false; } /** - * @return the findAll + * Searches for a match with the sequence name, and if found, adds the + * sequence to the list of match ids (but not as a duplicate). Answers true if + * a match was added, else false. + * + * @param seq + * @param regex + * @return */ - public boolean isFindAll() + protected boolean searchSequenceName(SequenceI seq, Regex regex) { - return findAll; + if (regex.search(seq.getName()) && !idMatch.contains(seq)) + { + idMatch.addElement(seq); + return true; + } + return false; } /** - * @param findAll - * the findAll to set + * Tries to interpret the search string as a residue position, and if valid, + * adds the position to the search results */ - public void setFindAll(boolean findAll) + protected boolean searchForResidueNumber(SequenceI seq, String searchString) { - this.findAll = findAll; + try + { + int res = Integer.parseInt(searchString); + if (seq.getStart() <= res && seq.getEnd() >= res) + { + searchResults.addResult(seq, res, res); + return true; + } + } catch (NumberFormatException ex) + { + } + return false; } /** - * @return the selection + * Sets whether the search is case sensitive (default is no) + * + * @param value */ - public jalview.datamodel.SequenceGroup getSelection() + public void setCaseSensitive(boolean value) { - return selection; + this.caseSensitive = value; } /** - * @param selection - * the selection to set + * Sets whether search returns all matches. Default is to return the next + * match only. + * + * @param value */ - public void setSelection(jalview.datamodel.SequenceGroup selection) + public void setFindAll(boolean value) { - this.selection = selection; + this.findAll = value; } /** @@ -337,14 +376,6 @@ public class Finder } /** - * @return the regex - */ - public com.stevesoft.pat.Regex getRegex() - { - return regex; - } - - /** * @return the searchResults */ public SearchResultsI getSearchResults() @@ -361,15 +392,6 @@ public class Finder } /** - * @param resIndex - * the resIndex to set - */ - public void setResIndex(int resIndex) - { - this.resIndex = resIndex; - } - - /** * @return the seqIndex */ public int getSeqIndex() @@ -378,21 +400,13 @@ public class Finder } /** - * @param seqIndex - * the seqIndex to set + * Sets whether search also searches in sequence description text (default is + * no) + * + * @param value */ - public void setSeqIndex(int seqIndex) - { - this.seqIndex = seqIndex; - } - - public boolean isIncludeDescription() - { - return includeDescription; - } - - public void setIncludeDescription(boolean includeDescription) + public void setIncludeDescription(boolean value) { - this.includeDescription = includeDescription; + this.includeDescription = value; } }