X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fanalysis%2FFinder.java;h=d7bf0a270a78cfc627362595b165ddac2380b901;hb=dfc18d64376ae95cfdbf3c112825587a8891179c;hp=faf79d154f71c99427d701bb2585c9558f0d0e60;hpb=25aaaa87042b3f507ad4348120df7dd073182759;p=jalview.git diff --git a/src/jalview/analysis/Finder.java b/src/jalview/analysis/Finder.java index faf79d1..d7bf0a2 100644 --- a/src/jalview/analysis/Finder.java +++ b/src/jalview/analysis/Finder.java @@ -1,6 +1,6 @@ /* - * Jalview - A Sequence Alignment Editor and Viewer (Version 2.8.2b1) - * Copyright (C) 2014 The Jalview Authors + * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) + * Copyright (C) $$Year-Rel$$ The Jalview Authors * * This file is part of Jalview. * @@ -21,324 +21,364 @@ package jalview.analysis; import jalview.datamodel.AlignmentI; +import jalview.datamodel.SearchResultMatchI; import jalview.datamodel.SearchResults; -import jalview.datamodel.Sequence; +import jalview.datamodel.SearchResultsI; import jalview.datamodel.SequenceGroup; +import jalview.datamodel.SequenceI; +import jalview.util.Comparison; import java.util.Vector; +import com.stevesoft.pat.Regex; + +/** + * Implements the search algorithm for the Find dialog + */ public class Finder { - /** - * Implements the search algorithms for the Find dialog box. + /* + * match residue locations */ - SearchResults searchResults; + private SearchResultsI searchResults; - AlignmentI alignment; + /* + * sequences matched by id or description + */ + private Vector idMatch; - jalview.datamodel.SequenceGroup selection = null; + /* + * the alignment to search over + */ + private AlignmentI alignment; - Vector idMatch = null; + /* + * (optional) selection to restrict search to + */ + private SequenceGroup selection; - boolean caseSensitive = false; + /* + * set true for case-sensitive search (default is false) + */ + private boolean caseSensitive; - private boolean includeDescription = false; + /* + * set true to search sequence description (default is false) + */ + private boolean includeDescription; - boolean findAll = false; + /* + * set true to return all matches (default is next match only) + */ + private boolean findAll; - com.stevesoft.pat.Regex regex = null; + /* + * sequence index in alignment to search from + */ + private int seqIndex; - /** - * hold's last-searched position between calles to find(false) + /* + * residue position in sequence to search from, base 1 + * (position of last match for a repeat search) */ - int seqIndex = 0, resIndex = -1; + private int resIndex; - public Finder(AlignmentI alignment, SequenceGroup selection) + /** + * Constructor to start searching an alignment, optionally restricting results + * to a selection + * + * @param al + * @param sel + */ + public Finder(AlignmentI al, SequenceGroup sel) { - this.alignment = alignment; - this.selection = selection; + this(al, sel, 0, -1); } /** - * restart search at given sequence and residue on alignment and (optionally) - * contained in selection + * Constructor to resume search at given sequence and residue on alignment and + * (optionally) restricted to a selection * - * @param alignment - * @param selectionGroup - * @param seqIndex - * @param resIndex + * @param al + * @param sel + * @param seqindex + * @param resindex */ - public Finder(AlignmentI alignment, SequenceGroup selectionGroup, - int seqIndex, int resIndex) + public Finder(AlignmentI al, SequenceGroup sel, int seqindex, + int resindex) { - this(alignment, selectionGroup); - this.seqIndex = seqIndex; - this.resIndex = resIndex; + this.alignment = al; + this.selection = sel; + this.seqIndex = seqindex; + this.resIndex = resindex; } - public boolean find(String searchString) + /** + * Performs a find for the given search string. By default the next match is + * found, but if setFindAll(true) has been called, then all matches are found. + * Sequences matched by id or description can be retrieved by getIdMatch(), + * and matched residue patterns by getSearchResults(). + * + * @param theSearchString + * @return + */ + public void find(String theSearchString) { - boolean hasResults = false; - if (!caseSensitive) - { - searchString = searchString.toUpperCase(); - } - regex = new com.stevesoft.pat.Regex(searchString); + String searchString = caseSensitive ? theSearchString.toUpperCase() + : theSearchString; + Regex regex = new Regex(searchString); regex.setIgnoreCase(!caseSensitive); searchResults = new SearchResults(); - idMatch = new Vector(); - Sequence seq; - String item = null; - boolean found = false; - int end = alignment.getHeight(); - - // ///////////////////////////////////////////// + idMatch = new Vector<>(); - if (selection != null) + if (selection != null && selection.getSize() < 1) { - if ((selection.getSize() < 1) - || ((selection.getEndRes() - selection.getStartRes()) < 2)) - { - selection = null; - } + selection = null; // ? ignore column-only selection } - while (!found && (seqIndex < end)) + boolean finished = false; + int end = alignment.getHeight(); + + while (!finished && (seqIndex < end)) { - seq = (Sequence) alignment.getSequenceAt(seqIndex); + SequenceI seq = alignment.getSequenceAt(seqIndex); - if ((selection != null && selection.getSize() > 0) - && !selection.getSequences(null).contains(seq)) + if ((selection != null) && !selection.contains(seq)) { + // this sequence is not in the selection - skip to next sequence seqIndex++; resIndex = -1; - continue; } + if (resIndex < 0) { + /* + * at start of sequence; try find by residue number, in sequence id, + * or (optionally) in sequence description + */ resIndex = 0; - // test for one off matches - sequence position and sequence ID - // //// is the searchString a residue number? - try + if (doNonMotifSearches(seq, searchString, regex)) { - int res = Integer.parseInt(searchString); - // possibly a residue number - check if valid for seq - if (seq.getEnd() >= res) - { - searchResults.addResult(seq, res, res); - hasResults = true; - // resIndex=seq.getLength(); - // seqIndex++; - if (!findAll) - { - found = true; - break; - } - } - } catch (NumberFormatException ex) - { - } - - if (regex.search(seq.getName())) - { - idMatch.addElement(seq); - hasResults = true; - if (!findAll) - { - // stop and return the match - found = true; - break; - } - } - - if (isIncludeDescription() && seq.getDescription() != null - && regex.search(seq.getDescription())) - { - idMatch.addElement(seq); - hasResults = true; - if (!findAll) - { - // stop and return the match - found = true; - break; - } + return; } } - item = seq.getSequenceAsString(); - if ((selection != null) - && (selection.getEndRes() < alignment.getWidth() - 1)) + finished = searchSequenceString(seq, regex) && !findAll; + + if (!finished) { - item = item.substring(0, selection.getEndRes() + 1); + seqIndex++; + resIndex = -1; } + } + } - // /Shall we ignore gaps???? - JBPNote: Add Flag for forcing this or not - StringBuffer noGapsSB = new StringBuffer(); - int insertCount = 0; - Vector spaces = new Vector(); - - for (int j = 0; j < item.length(); j++) + /** + * Searches the sequence, starting from resIndex (base 1), and + * adds matches to searchResults. The search is restricted to the + * selection region if there is one. Answers true if any match is + * added, else false. + * + * @param seq + * @param regex + * @return + */ + protected boolean searchSequenceString(SequenceI seq, Regex regex) + { + /* + * Restrict search to selected region if there is one + */ + int seqColStart = 0; + int seqColEnd = seq.getLength() - 1; + int residueOffset = 0; + if (selection != null) + { + int selColEnd = selection.getEndRes(); + int selColStart = selection.getStartRes(); + if (selColStart > seqColEnd) { - if (!jalview.util.Comparison.isGap(item.charAt(j))) - { - noGapsSB.append(item.charAt(j)); - spaces.addElement(new Integer(insertCount)); - } - else - { - insertCount++; - } + return false; // sequence doesn't reach selection region } + seqColStart = selColStart; + seqColEnd = Math.min(seqColEnd, selColEnd); + residueOffset = seq.findPosition(selection.getStartRes()) + - seq.getStart(); + } + String seqString = seq.getSequenceAsString(seqColStart, seqColEnd + 1); - String noGaps = noGapsSB.toString(); + String noGaps = AlignSeq.extractGaps(Comparison.GapChars, seqString); - for (int r = resIndex; r < noGaps.length(); r++) - { + SearchResultMatchI lastMatch = null; + boolean found = false; - if (regex.searchFrom(noGaps, r)) + for (int r = resIndex; r < noGaps.length(); r++) + { + /* + * searchFrom position is base 0, r is base 1, + * so search is from the position after the r'th residue + */ + if (regex.searchFrom(noGaps, r)) + { + resIndex = regex.matchedFrom(); + resIndex += residueOffset; // add back #residues before selection region + int matchStartPosition = resIndex + seq.getStart(); + int matchEndPosition = matchStartPosition + regex.charsMatched() + - 1; + if (lastMatch == null || !lastMatch.contains(seq, + matchStartPosition, matchEndPosition)) { - resIndex = regex.matchedFrom(); - - if ((selection != null && selection.getSize() > 0) - && ((resIndex + Integer.parseInt(spaces.elementAt( - resIndex).toString())) < selection.getStartRes())) - { - continue; - } -// if invalid string used, then regex has no matched to/from - int sres = seq - .findPosition(resIndex - + Integer.parseInt(spaces.elementAt(resIndex) - .toString())); - int eres = seq.findPosition(regex.matchedTo() - - 1 - + Integer.parseInt(spaces - .elementAt(regex.matchedTo() - 1).toString())); - - searchResults.addResult(seq, sres, eres); - hasResults = true; - if (!findAll) - { - // thats enough, break and display the result - found = true; - resIndex++; - - break; - } - - r = resIndex; + lastMatch = searchResults.addResult(seq, matchStartPosition, + matchEndPosition); + found = true; } - else + if (!findAll) { - break; + resIndex++; + return true; } + r = resIndex; } - - if (!found) + else { - seqIndex++; - resIndex = -1; + break; } } - - /** - * We now search the Id string in the main search loop. for (int id = 0; id - * < alignment.getHeight(); id++) { if - * (regex.search(alignment.getSequenceAt(id).getName())) { - * idMatch.addElement(alignment.getSequenceAt(id)); hasResults = true; } } - */ - return hasResults; - } - - /** - * @return the alignment - */ - public AlignmentI getAlignment() - { - return alignment; - } - - /** - * @param alignment - * the alignment to set - */ - public void setAlignment(AlignmentI alignment) - { - this.alignment = alignment; + return found; } /** - * @return the caseSensitive + * Does searches other than for residue patterns. Currently this includes + * + * Answers true if a match is found and we are not doing 'find all' (so this + * search action is complete), else false. + * + * @param seq + * @param searchString + * @param regex + * @return */ - public boolean isCaseSensitive() + protected boolean doNonMotifSearches(SequenceI seq, String searchString, + Regex regex) { - return caseSensitive; + if (searchForResidueNumber(seq, searchString) && !findAll) + { + return true; + } + if (searchSequenceName(seq, regex) && !findAll) + { + return true; + } + if (searchSequenceDescription(seq, regex) && !findAll) + { + return true; + } + return false; } /** - * @param caseSensitive - * the caseSensitive to set + * Searches for a match with the sequence description, if that option was + * requested, and if found, adds the sequence to the list of match ids (but + * not as a duplicate). Answers true if a match was added, else false. + * + * @param seq + * @param regex + * @return */ - public void setCaseSensitive(boolean caseSensitive) + protected boolean searchSequenceDescription(SequenceI seq, Regex regex) { - this.caseSensitive = caseSensitive; + if (!includeDescription) + { + return false; + } + String desc = seq.getDescription(); + if (desc != null && regex.search(desc) && !idMatch.contains(seq)) + { + idMatch.addElement(seq); + return true; + } + return false; } /** - * @return the findAll + * Searches for a match with the sequence name, and if found, adds the + * sequence to the list of match ids (but not as a duplicate). Answers true if + * a match was added, else false. + * + * @param seq + * @param regex + * @return */ - public boolean isFindAll() + protected boolean searchSequenceName(SequenceI seq, Regex regex) { - return findAll; + if (regex.search(seq.getName()) && !idMatch.contains(seq)) + { + idMatch.addElement(seq); + return true; + } + return false; } /** - * @param findAll - * the findAll to set + * Tries to interpret the search string as a residue position, and if valid, + * adds the position to the search results */ - public void setFindAll(boolean findAll) + protected boolean searchForResidueNumber(SequenceI seq, String searchString) { - this.findAll = findAll; + try + { + int res = Integer.parseInt(searchString); + if (seq.getStart() <= res && seq.getEnd() >= res) + { + searchResults.addResult(seq, res, res); + return true; + } + } catch (NumberFormatException ex) + { + } + return false; } /** - * @return the selection + * Sets whether the search is case sensitive (default is no) + * + * @param value */ - public jalview.datamodel.SequenceGroup getSelection() + public void setCaseSensitive(boolean value) { - return selection; + this.caseSensitive = value; } /** - * @param selection - * the selection to set + * Sets whether search returns all matches. Default is to return the next + * match only. + * + * @param value */ - public void setSelection(jalview.datamodel.SequenceGroup selection) + public void setFindAll(boolean value) { - this.selection = selection; + this.findAll = value; } /** - * @return the idMatch + * Returns the (possibly empty) list of matching sequences (when search + * includes searching sequence names) + * + * @return */ - public Vector getIdMatch() + public Vector getIdMatch() { return idMatch; } /** - * @return the regex - */ - public com.stevesoft.pat.Regex getRegex() - { - return regex; - } - - /** * @return the searchResults */ - public SearchResults getSearchResults() + public SearchResultsI getSearchResults() { return searchResults; } @@ -352,15 +392,6 @@ public class Finder } /** - * @param resIndex - * the resIndex to set - */ - public void setResIndex(int resIndex) - { - this.resIndex = resIndex; - } - - /** * @return the seqIndex */ public int getSeqIndex() @@ -369,21 +400,13 @@ public class Finder } /** - * @param seqIndex - * the seqIndex to set + * Sets whether search also searches in sequence description text (default is + * no) + * + * @param value */ - public void setSeqIndex(int seqIndex) - { - this.seqIndex = seqIndex; - } - - public boolean isIncludeDescription() - { - return includeDescription; - } - - public void setIncludeDescription(boolean includeDescription) + public void setIncludeDescription(boolean value) { - this.includeDescription = includeDescription; + this.includeDescription = value; } }