/*
- * Jalview - A Sequence Alignment Editor and Viewer (Version 2.4)
- * Copyright (C) 2008 AM Waterhouse, J Procter, G Barton, M Clamp, S Searle
+ * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
+ * Copyright (C) $$Year-Rel$$ The Jalview Authors
*
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
+ * This file is part of Jalview.
*
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
+ * Jalview is free software: you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation, either version 3
+ * of the License, or (at your option) any later version.
+ *
+ * Jalview is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ * PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
+ * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
+ * The Jalview Authors are detailed in the 'AUTHORS' file.
*/
package jalview.analysis;
-import java.util.*;
+import java.util.Locale;
+
+import jalview.api.AlignViewportI;
+import jalview.api.FinderI;
+import jalview.datamodel.AlignmentI;
+import jalview.datamodel.SearchResultMatchI;
+import jalview.datamodel.SearchResults;
+import jalview.datamodel.SearchResultsI;
+import jalview.datamodel.SequenceGroup;
+import jalview.datamodel.SequenceI;
+import jalview.util.Comparison;
+import jalview.util.MapList;
-import jalview.datamodel.*;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.List;
-public class Finder
+import com.stevesoft.pat.Regex;
+
+/**
+ * Implements the search algorithm for the Find dialog
+ */
+public class Finder implements FinderI
{
- /**
- * Implements the search algorithms for the Find dialog box.
+ /*
+ * matched residue locations
+ */
+ private SearchResultsI searchResults;
+
+ /*
+ * sequences matched by id or description
*/
- SearchResults searchResults;
+ private List<SequenceI> idMatches;
- AlignmentI alignment;
+ /*
+ * the viewport to search over
+ */
+ private AlignViewportI viewport;
- jalview.datamodel.SequenceGroup selection = null;
+ /*
+ * sequence index in alignment to search from
+ */
+ private int sequenceIndex;
- Vector idMatch = null;
+ /*
+ * position offset in sequence to search from, base 0
+ * (position after start of last match for a 'find next')
+ */
+ private int residueIndex;
- boolean caseSensitive = false;
+ /*
+ * the true sequence position of the start of the
+ * last sequence searched (when 'ignore hidden regions' does not apply)
+ */
+ private int searchedSequenceStartPosition;
- boolean findAll = false;
+ /*
+ * when 'ignore hidden regions' applies, this holds the mapping from
+ * the visible sequence positions (1, 2, ...) to true sequence positions
+ */
+ private MapList searchedSequenceMap;
- com.stevesoft.pat.Regex regex = null;
+ private String seqToSearch;
/**
- * hold's last-searched position between calles to find(false)
+ * Constructor for searching a viewport
+ *
+ * @param av
*/
- int seqIndex = 0, resIndex = 0;
-
- public Finder(AlignmentI alignment, SequenceGroup selection)
+ public Finder(AlignViewportI av)
{
- this.alignment = alignment;
- this.selection = selection;
+ this.viewport = av;
+ this.sequenceIndex = 0;
+ this.residueIndex = -1;
}
- public Finder(AlignmentI alignment, SequenceGroup selectionGroup,
- int seqIndex, int resIndex)
+ @Override
+ public void findAll(String theSearchString, boolean matchCase,
+ boolean searchDescription, boolean ignoreHidden)
{
- this(alignment, selectionGroup);
- this.seqIndex = seqIndex;
- this.resIndex = resIndex;
+ /*
+ * search from the start
+ */
+ sequenceIndex = 0;
+ residueIndex = -1;
+
+ doFind(theSearchString, matchCase, searchDescription, true,
+ ignoreHidden);
+
+ /*
+ * reset to start for next search
+ */
+ sequenceIndex = 0;
+ residueIndex = -1;
}
- public boolean find(String searchString)
+ @Override
+ public void findNext(String theSearchString, boolean matchCase,
+ boolean searchDescription, boolean ignoreHidden)
{
- boolean hasResults = false;
- if (!caseSensitive)
+ doFind(theSearchString, matchCase, searchDescription, false,
+ ignoreHidden);
+
+ if (searchResults.isEmpty() && idMatches.isEmpty())
{
- searchString = searchString.toUpperCase();
+ /*
+ * search failed - reset to start for next search
+ */
+ sequenceIndex = 0;
+ residueIndex = -1;
}
- regex = new com.stevesoft.pat.Regex(searchString);
- regex.setIgnoreCase(!caseSensitive);
+ }
+
+ /**
+ * Performs a 'find next' or 'find all'
+ *
+ * @param theSearchString
+ * @param matchCase
+ * @param searchDescription
+ * @param findAll
+ * @param ignoreHidden
+ */
+ protected void doFind(String theSearchString, boolean matchCase,
+ boolean searchDescription, boolean findAll, boolean ignoreHidden)
+ {
searchResults = new SearchResults();
- idMatch = new Vector();
- Sequence seq;
- String item = null;
- boolean found = false;
+ idMatches = new ArrayList<>();
- // //// is the searchString a residue number?
- try
- {
- int res = Integer.parseInt(searchString);
- found = true;
- if (selection == null || selection.getSize() < 1)
- {
- seq = (Sequence) alignment.getSequenceAt(0);
- }
- else
- {
- seq = (Sequence) (selection.getSequenceAt(0));
- }
+ String searchString = matchCase ? theSearchString
+ : theSearchString.toUpperCase(Locale.ROOT);
+ Regex searchPattern = new Regex(searchString);
+ searchPattern.setIgnoreCase(!matchCase);
- searchResults.addResult(seq, res, res);
- hasResults = true;
- } catch (NumberFormatException ex)
+ SequenceGroup selection = viewport.getSelectionGroup();
+ if (selection != null && selection.getSize() < 1)
{
+ selection = null; // ? ignore column-only selection
}
- // /////////////////////////////////////////////
-
+ AlignmentI alignment = viewport.getAlignment();
int end = alignment.getHeight();
- if (selection != null)
+ getSequence(ignoreHidden);
+
+ boolean found = false;
+ while ((!found || findAll) && sequenceIndex < end)
{
- if ((selection.getSize() < 1)
- || ((selection.getEndRes() - selection.getStartRes()) < 2))
- {
- selection = null;
- }
+ found = findNextMatch(searchString, searchPattern, searchDescription,
+ ignoreHidden);
}
+ }
- while (!found && (seqIndex < end))
+ /**
+ * Calculates and saves the sequence string to search. The string is
+ * restricted to the current selection region if there is one, and is saved
+ * with all gaps removed.
+ * <p>
+ * If there are hidden columns, and option {@ignoreHidden} is selected, then
+ * only visible positions of the sequence are included, and a mapping is also
+ * constructed from the returned string positions to the true sequence
+ * positions.
+ * <p>
+ * Note we have to do this each time {@code findNext} or {@code findAll} is
+ * called, in case the alignment, selection group or hidden columns have
+ * changed. In particular, if the sequence at offset {@code sequenceIndex} in
+ * the alignment is (no longer) in the selection group, search is advanced to
+ * the next sequence that is.
+ * <p>
+ * Sets sequence string to the empty string if there are no more sequences (in
+ * selection group if any) at or after {@code sequenceIndex}.
+ * <p>
+ * Returns true if a sequence could be found, false if end of alignment was
+ * reached
+ *
+ * @param ignoreHidden
+ * @return
+ */
+ private boolean getSequence(boolean ignoreHidden)
+ {
+ AlignmentI alignment = viewport.getAlignment();
+ if (sequenceIndex >= alignment.getHeight())
{
- seq = (Sequence) alignment.getSequenceAt(seqIndex);
-
- if ((selection != null && selection.getSize()>0)
- && !selection.getSequences(null).contains(seq))
+ seqToSearch = "";
+ return false;
+ }
+ SequenceI seq = alignment.getSequenceAt(sequenceIndex);
+ SequenceGroup selection = viewport.getSelectionGroup();
+ if (selection != null && !selection.contains(seq))
+ {
+ if (!nextSequence(ignoreHidden))
{
- seqIndex++;
- resIndex = 0;
-
- continue;
+ return false;
}
+ seq = alignment.getSequenceAt(sequenceIndex);
+ }
- item = seq.getSequenceAsString();
-
- if ((selection != null)
- && (selection.getEndRes() < alignment.getWidth() - 1))
+ String seqString = null;
+ if (ignoreHidden)
+ {
+ seqString = getVisibleSequence(seq);
+ this.searchedSequenceStartPosition = 1;
+ }
+ else
+ {
+ int startCol = 0;
+ int endCol = seq.getLength() - 1;
+ this.searchedSequenceStartPosition = seq.getStart();
+ if (selection != null)
{
- item = item.substring(0, selection.getEndRes() + 1);
+ startCol = selection.getStartRes();
+ endCol = Math.min(endCol, selection.getEndRes());
+ this.searchedSequenceStartPosition = seq.findPosition(startCol);
}
+ seqString = seq.getSequenceAsString(startCol, endCol + 1);
+ }
- // /Shall we ignore gaps???? - JBPNote: Add Flag for forcing this or not
- StringBuffer noGapsSB = new StringBuffer();
- int insertCount = 0;
- Vector spaces = new Vector();
-
- for (int j = 0; j < item.length(); j++)
- {
- if (!jalview.util.Comparison.isGap(item.charAt(j)))
- {
- noGapsSB.append(item.charAt(j));
- spaces.addElement(new Integer(insertCount));
- }
- else
- {
- insertCount++;
- }
- }
+ /*
+ * remove gaps; note that even if this leaves an empty string, we 'search'
+ * the sequence anyway (for possible match on name or description)
+ */
+ String ungapped = AlignSeq.extractGaps(Comparison.GapChars, seqString);
+ this.seqToSearch = ungapped;
- String noGaps = noGapsSB.toString();
+ return true;
+ }
- for (int r = resIndex; r < noGaps.length(); r++)
+ /**
+ * Returns a string consisting of only the visible residues of {@code seq}
+ * from alignment column {@ fromColumn}, restricted to the current selection
+ * region if there is one.
+ * <p>
+ * As a side-effect, also computes the mapping from the true sequence
+ * positions to the positions (1, 2, ...) of the returned sequence. This is to
+ * allow search matches in the visible sequence to be converted to sequence
+ * positions.
+ *
+ * @param seq
+ * @return
+ */
+ private String getVisibleSequence(SequenceI seq)
+ {
+ /*
+ * get start / end columns of sequence and convert to base 0
+ * (so as to match the visible column ranges)
+ */
+ int seqStartCol = seq.findIndex(seq.getStart()) - 1;
+ int seqEndCol = seq.findIndex(seq.getStart() + seq.getLength() - 1) - 1;
+ Iterator<int[]> visibleColumns = viewport.getViewAsVisibleContigs(true);
+ StringBuilder visibleSeq = new StringBuilder(seqEndCol - seqStartCol);
+ List<int[]> fromRanges = new ArrayList<>();
+
+ while (visibleColumns.hasNext())
+ {
+ int[] range = visibleColumns.next();
+ if (range[0] > seqEndCol)
{
-
- if (regex.searchFrom(noGaps, r))
- {
- resIndex = regex.matchedFrom();
-
- if ((selection != null && selection.getSize()>0)
- && ((resIndex + Integer.parseInt(spaces.elementAt(
- resIndex).toString())) < selection.getStartRes()))
- {
- continue;
- }
-
- int sres = seq
- .findPosition(resIndex
- + Integer.parseInt(spaces.elementAt(resIndex)
- .toString()));
- int eres = seq.findPosition(regex.matchedTo()
- - 1
- + Integer.parseInt(spaces
- .elementAt(regex.matchedTo() - 1).toString()));
-
- searchResults.addResult(seq, sres, eres);
- hasResults = true;
- if (!findAll)
- {
- // thats enough, break and display the result
- found = true;
- resIndex++;
-
- break;
- }
-
- r = resIndex;
- }
- else
- {
- break;
- }
+ // beyond the end of the sequence
+ break;
}
-
- if (!found)
+ if (range[1] < seqStartCol)
{
- seqIndex++;
- resIndex = 0;
+ // before the start of the sequence
+ continue;
}
- }
-
- for (int id = 0; id < alignment.getHeight(); id++)
- {
- if (regex.search(alignment.getSequenceAt(id).getName()))
+ String subseq = seq.getSequenceAsString(range[0], range[1] + 1);
+ String ungapped = AlignSeq.extractGaps(Comparison.GapChars, subseq);
+ visibleSeq.append(ungapped);
+ if (!ungapped.isEmpty())
{
- idMatch.addElement(alignment.getSequenceAt(id));
- hasResults = true;
+ /*
+ * visible region includes at least one non-gap character,
+ * so add the range to the mapping being constructed
+ */
+ int seqResFrom = seq.findPosition(range[0]);
+ int seqResTo = seqResFrom + ungapped.length() - 1;
+ fromRanges.add(new int[] { seqResFrom, seqResTo });
}
}
- return hasResults;
- }
- /**
- * @return the alignment
- */
- public AlignmentI getAlignment()
- {
- return alignment;
+ /*
+ * construct the mapping
+ * from: visible sequence positions 1..length
+ * to: true residue positions of the alignment sequence
+ */
+ List<int[]> toRange = Arrays
+ .asList(new int[]
+ { 1, visibleSeq.length() });
+ searchedSequenceMap = new MapList(fromRanges, toRange, 1, 1);
+
+ return visibleSeq.toString();
}
/**
- * @param alignment
- * the alignment to set
+ * Advances the search to the next sequence in the alignment. Sequences not in
+ * the current selection group (if there is one) are skipped. The
+ * (sub-)sequence to be searched is extracted, gaps removed, and saved, or set
+ * to null if there are no more sequences to search.
+ * <p>
+ * Returns true if a sequence could be found, false if end of alignment was
+ * reached
+ *
+ * @param ignoreHidden
*/
- public void setAlignment(AlignmentI alignment)
+ private boolean nextSequence(boolean ignoreHidden)
{
- this.alignment = alignment;
- }
+ sequenceIndex++;
+ residueIndex = -1;
- /**
- * @return the caseSensitive
- */
- public boolean isCaseSensitive()
- {
- return caseSensitive;
+ return getSequence(ignoreHidden);
}
/**
- * @param caseSensitive
- * the caseSensitive to set
+ * Finds the next match in the given sequence, starting at offset
+ * {@code residueIndex}. Answers true if a match is found, else false.
+ * <p>
+ * If a match is found, {@code residueIndex} is advanced to the position after
+ * the start of the matched region, ready for the next search.
+ * <p>
+ * If no match is found, {@code sequenceIndex} is advanced ready to search the
+ * next sequence.
+ *
+ * @param seqToSearch
+ * @param searchString
+ * @param searchPattern
+ * @param matchDescription
+ * @param ignoreHidden
+ * @return
*/
- public void setCaseSensitive(boolean caseSensitive)
+ protected boolean findNextMatch(String searchString, Regex searchPattern,
+ boolean matchDescription, boolean ignoreHidden)
{
- this.caseSensitive = caseSensitive;
- }
+ if (residueIndex < 0)
+ {
+ /*
+ * at start of sequence; try find by residue number, in sequence id,
+ * or (optionally) in sequence description
+ */
+ if (doNonMotifSearches(searchString, searchPattern, matchDescription))
+ {
+ return true;
+ }
+ }
- /**
- * @return the findAll
- */
- public boolean isFindAll()
- {
- return findAll;
+ /*
+ * search for next match in sequence string
+ */
+ int end = seqToSearch.length();
+ while (residueIndex < end)
+ {
+ boolean matched = searchPattern.searchFrom(seqToSearch, residueIndex);
+ if (matched)
+ {
+ if (recordMatch(searchPattern, ignoreHidden))
+ {
+ return true;
+ }
+ }
+ else
+ {
+ residueIndex = Integer.MAX_VALUE;
+ }
+ }
+
+ nextSequence(ignoreHidden);
+ return false;
}
/**
- * @param findAll
- * the findAll to set
+ * Adds the match held in the <code>searchPattern</code> Regex to the
+ * <code>searchResults</code>, unless it is a subregion of the last match
+ * recorded. <code>residueIndex</code> is advanced to the position after the
+ * start of the matched region, ready for the next search. Answers true if a
+ * match was added, else false.
+ * <p>
+ * Matches that lie entirely within hidden regions of the alignment are not
+ * added.
+ *
+ * @param searchPattern
+ * @param ignoreHidden
+ * @return
*/
- public void setFindAll(boolean findAll)
+ protected boolean recordMatch(Regex searchPattern, boolean ignoreHidden)
{
- this.findAll = findAll;
+ SequenceI seq = viewport.getAlignment().getSequenceAt(sequenceIndex);
+
+ /*
+ * convert start/end of the match to sequence coordinates
+ */
+ int offset = searchPattern.matchedFrom();
+ int matchStartPosition = this.searchedSequenceStartPosition + offset;
+ int matchEndPosition = matchStartPosition + searchPattern.charsMatched()
+ - 1;
+
+ /*
+ * update residueIndex to next position after the start of the match
+ * (findIndex returns a value base 1, columnIndex is held base 0)
+ */
+ residueIndex = searchPattern.matchedFrom() + 1;
+
+ /*
+ * return false if the match is entirely in a hidden region
+ */
+ if (allHidden(seq, matchStartPosition, matchEndPosition))
+ {
+ return false;
+ }
+
+ /*
+ * check that this match is not a subset of the previous one (JAL-2302)
+ */
+ List<SearchResultMatchI> matches = searchResults.getResults();
+ SearchResultMatchI lastMatch = matches.isEmpty() ? null
+ : matches.get(matches.size() - 1);
+
+ if (lastMatch == null || !lastMatch.contains(seq, matchStartPosition,
+ matchEndPosition))
+ {
+ addMatch(seq, matchStartPosition, matchEndPosition, ignoreHidden);
+ return true;
+ }
+
+ return false;
}
/**
- * @return the selection
+ * Adds one match to the stored list. If hidden residues are being skipped,
+ * then the match may need to be split into contiguous positions of the
+ * sequence (so it does not include skipped residues).
+ *
+ * @param seq
+ * @param matchStartPosition
+ * @param matchEndPosition
+ * @param ignoreHidden
*/
- public jalview.datamodel.SequenceGroup getSelection()
+ private void addMatch(SequenceI seq, int matchStartPosition,
+ int matchEndPosition, boolean ignoreHidden)
{
- return selection;
+ if (!ignoreHidden)
+ {
+ /*
+ * simple case
+ */
+ searchResults.addResult(seq, matchStartPosition, matchEndPosition);
+ return;
+ }
+
+ /*
+ * get start-end contiguous ranges in underlying sequence
+ */
+ int[] truePositions = searchedSequenceMap
+ .locateInFrom(matchStartPosition, matchEndPosition);
+ searchResults.addResult(seq, truePositions);
}
/**
- * @param selection
- * the selection to set
+ * Returns true if all residues are hidden, else false
+ *
+ * @param seq
+ * @param fromPos
+ * @param toPos
+ * @return
*/
- public void setSelection(jalview.datamodel.SequenceGroup selection)
+ private boolean allHidden(SequenceI seq, int fromPos, int toPos)
{
- this.selection = selection;
+ if (!viewport.hasHiddenColumns())
+ {
+ return false;
+ }
+ for (int res = fromPos; res <= toPos; res++)
+ {
+ if (isVisible(seq, res))
+ {
+ return false;
+ }
+ }
+ return true;
}
/**
- * @return the idMatch
+ * Does searches other than for residue patterns. Currently this includes
+ * <ul>
+ * <li>find residue by position (if search string is a number)</li>
+ * <li>match search string to sequence id</li>
+ * <li>match search string to sequence description (optional)</li>
+ * </ul>
+ * Answers true if a match is found, else false.
+ *
+ * @param searchString
+ * @param searchPattern
+ * @param includeDescription
+ * @return
*/
- public Vector getIdMatch()
+ protected boolean doNonMotifSearches(String searchString,
+ Regex searchPattern, boolean includeDescription)
{
- return idMatch;
+ SequenceI seq = viewport.getAlignment().getSequenceAt(sequenceIndex);
+
+ /*
+ * position sequence search to start of sequence
+ */
+ residueIndex = 0;
+ try
+ {
+ int res = Integer.parseInt(searchString);
+ return searchForResidueNumber(seq, res);
+ } catch (NumberFormatException ex)
+ {
+ // search pattern is not a number
+ }
+
+ if (searchSequenceName(seq, searchPattern))
+ {
+ return true;
+ }
+ if (includeDescription && searchSequenceDescription(seq, searchPattern))
+ {
+ return true;
+ }
+ return false;
}
/**
- * @return the regex
+ * Searches for a match with the sequence description, and if found, adds the
+ * sequence to the list of match ids (but not as a duplicate). Answers true if
+ * a match was added, else false.
+ *
+ * @param seq
+ * @param searchPattern
+ * @return
*/
- public com.stevesoft.pat.Regex getRegex()
+ protected boolean searchSequenceDescription(SequenceI seq,
+ Regex searchPattern)
{
- return regex;
+ String desc = seq.getDescription();
+ if (desc != null && searchPattern.search(desc)
+ && !idMatches.contains(seq))
+ {
+ idMatches.add(seq);
+ return true;
+ }
+ return false;
}
/**
- * @return the searchResults
+ * Searches for a match with the sequence name, and if found, adds the
+ * sequence to the list of match ids (but not as a duplicate). Answers true if
+ * a match was added, else false.
+ *
+ * @param seq
+ * @param searchPattern
+ * @return
*/
- public SearchResults getSearchResults()
+ protected boolean searchSequenceName(SequenceI seq, Regex searchPattern)
{
- return searchResults;
+ if (searchPattern.search(seq.getName()) && !idMatches.contains(seq))
+ {
+ idMatches.add(seq);
+ return true;
+ }
+ return false;
}
/**
- * @return the resIndex
+ * If the residue position is valid for the sequence, and in a visible column,
+ * adds the position to the search results and returns true, else answers
+ * false.
+ *
+ * @param seq
+ * @param resNo
+ * @return
*/
- public int getResIndex()
+ protected boolean searchForResidueNumber(SequenceI seq, int resNo)
{
- return resIndex;
+ if (seq.getStart() <= resNo && seq.getEnd() >= resNo)
+ {
+ if (isVisible(seq, resNo))
+ {
+ searchResults.addResult(seq, resNo, resNo);
+ return true;
+ }
+ }
+ return false;
}
/**
- * @param resIndex
- * the resIndex to set
+ * Returns true if the residue is in a visible column, else false
+ *
+ * @param seq
+ * @param res
+ * @return
*/
- public void setResIndex(int resIndex)
+ private boolean isVisible(SequenceI seq, int res)
{
- this.resIndex = resIndex;
+ if (!viewport.hasHiddenColumns())
+ {
+ return true;
+ }
+ int col = seq.findIndex(res); // base 1
+ return viewport.getAlignment().getHiddenColumns().isVisible(col - 1); // base
+ // 0
}
- /**
- * @return the seqIndex
- */
- public int getSeqIndex()
+ @Override
+ public List<SequenceI> getIdMatches()
{
- return seqIndex;
+ return idMatches;
}
- /**
- * @param seqIndex
- * the seqIndex to set
- */
- public void setSeqIndex(int seqIndex)
+ @Override
+ public SearchResultsI getSearchResults()
{
- this.seqIndex = seqIndex;
+ return searchResults;
}
}