import jalview.datamodel.SearchResultsI;
import jalview.datamodel.SequenceGroup;
import jalview.datamodel.SequenceI;
+import jalview.datamodel.VisibleContigsIterator;
import jalview.util.Comparison;
+import java.util.List;
import java.util.Vector;
import com.stevesoft.pat.Regex;
public class Finder
{
/*
- * match residue locations
+ * matched residue locations
*/
private SearchResultsI searchResults;
/*
* sequence index in alignment to search from
*/
- private int seqIndex;
+ private int sequenceIndex;
/*
- * residue position in sequence to search from, base 1
+ * column position in sequence to search from, base 0
+ * - absolute column number including any hidden columns
* (position of last match for a repeat search)
*/
- private int resIndex;
+ private int columnIndex;
/**
* Constructor to start searching an alignment, optionally restricting results
* @param al
* @param sel
* @param seqindex
- * @param resindex
+ * @param colindex
*/
public Finder(AlignmentI al, SequenceGroup sel, int seqindex,
- int resindex)
+ int colindex)
{
this.alignment = al;
this.selection = sel;
- this.seqIndex = seqindex;
- this.resIndex = resindex;
+ this.sequenceIndex = seqindex;
+ this.columnIndex = colindex;
}
/**
*/
public void find(String theSearchString)
{
- String searchString = caseSensitive ? theSearchString.toUpperCase()
- : theSearchString;
- Regex regex = new Regex(searchString);
- regex.setIgnoreCase(!caseSensitive);
+ if (findAll)
+ {
+ sequenceIndex = 0;
+ columnIndex = -1;
+ }
+
+ String searchString = caseSensitive ? theSearchString
+ : theSearchString.toUpperCase();
+ Regex searchPattern = new Regex(searchString);
+ searchPattern.setIgnoreCase(!caseSensitive);
searchResults = new SearchResults();
idMatch = new Vector<>();
selection = null; // ? ignore column-only selection
}
- boolean finished = false;
int end = alignment.getHeight();
- while (!finished && (seqIndex < end))
+ while (sequenceIndex < end)
{
- SequenceI seq = alignment.getSequenceAt(seqIndex);
-
- if ((selection != null) && !selection.contains(seq))
+ SequenceI seq = alignment.getSequenceAt(sequenceIndex);
+ boolean found = findNext(seq, searchString, searchPattern);
+ if (found && !findAll)
{
- // this sequence is not in the selection - skip to next sequence
- seqIndex++;
- resIndex = -1;
- continue;
+ return;
}
-
- if (resIndex < 0)
+ if (!found)
{
- /*
- * at start of sequence; try find by residue number, in sequence id,
- * or (optionally) in sequence description
- */
- resIndex = 0;
- if (doNonMotifSearches(seq, searchString, regex))
- {
- return;
- }
+ sequenceIndex++;
+ columnIndex = -1;
}
+ }
+ }
+
+ /**
+ * Answers the start-end column range of the visible region starting at or
+ * after the given column. if there are no hidden columns, this just returns
+ * the remaining width of the alignment. Answers null if there are no visible
+ * columns at or after <code>column</code>.
+ */
+ protected int[] getNextVisibleRegion(int column)
+ {
+ VisibleContigsIterator visibleRegions = alignment.getHiddenColumns()
+ .getVisContigsIterator(column, alignment.getWidth(),
+ false);
+ return visibleRegions.hasNext() ? visibleRegions.next() : null;
+ }
+
+ /**
+ * Finds the next match in the given sequence, starting at column at
+ * <code>columnIndex</code>. Answers true if a match is found, else false. If
+ * a match is found, <code>columnIndex</code> is advanced to the column after
+ * the start of the matched region, ready for a search from the next position.
+ *
+ * @param seq
+ * @param searchString
+ * @param searchPattern
+ * @return
+ */
+ protected boolean findNext(SequenceI seq, String searchString,
+ Regex searchPattern)
+ {
+ if (selection != null && !selection.contains(seq))
+ {
+ /*
+ * this sequence is not in the selection - advance to next sequence
+ */
+ return false;
+ }
- finished = searchSequenceString(seq, regex) && !findAll;
+ if (columnIndex < 0)
+ {
+ /*
+ * at start of sequence; try find by residue number, in sequence id,
+ * or (optionally) in sequence description
+ */
+ if (doNonMotifSearches(seq, searchString, searchPattern))
+ {
+ return true;
+ }
+ }
- if (!finished)
+ /*
+ * search for next match in sequence string
+ */
+ int end = seq.getLength();
+ while (columnIndex < end)
+ {
+ if (searchNextVisibleRegion(seq, searchPattern))
{
- seqIndex++;
- resIndex = -1;
+ return true;
}
}
+ return false;
}
/**
- * Searches the sequence, starting from <code>resIndex</code> (base 1), and
- * adds matches to <code>searchResults</code>. The search is restricted to the
- * <code>selection</code> region if there is one. Answers true if any match is
- * added, else false.
+ * Searches the sequence, starting from <code>columnIndex</code>, and adds the
+ * next match (if any) to <code>searchResults</code>. The search is restricted
+ * to the next visible column region, and to the <code>selection</code> region
+ * if there is one. Answers true if a match is added, else false.
*
* @param seq
- * @param regex
+ * @param searchPattern
* @return
*/
- protected boolean searchSequenceString(SequenceI seq, Regex regex)
+ protected boolean searchNextVisibleRegion(SequenceI seq, Regex searchPattern)
{
/*
- * Restrict search to selected region if there is one
+ * sequence columns to search (working in absolute column
+ * positions, base 0, including any hidden columns)
*/
- int seqColStart = 0;
+ int seqColStart = columnIndex;
int seqColEnd = seq.getLength() - 1;
- int residueOffset = 0;
+
+ /*
+ * restrict search to (next) visible column region,
+ * in case there are hidden columns
+ */
+ int[] visible = getNextVisibleRegion(columnIndex);
+ if (visible != null)
+ {
+ seqColStart = Math.max(seqColStart, visible[0]);
+ seqColEnd = Math.min(seqColEnd, visible[1]);
+ }
+ else
+ {
+ columnIndex = seqColEnd + 1;
+ return false;
+ }
+
+ /*
+ * restrict search to selected region if there is one
+ */
if (selection != null)
{
- int selColEnd = selection.getEndRes();
- int selColStart = selection.getStartRes();
- if (selColStart > seqColEnd)
+ int selectionStart = selection.getStartRes();
+ int selectionEnd = selection.getEndRes();
+ if (selectionStart > seqColEnd || selectionEnd < seqColStart)
{
- return false; // sequence doesn't reach selection region
+ /*
+ * sequence region doesn't overlap selection region -
+ * no match, advance to next visible region
+ */
+ columnIndex = seqColEnd + 1;
+ return false;
}
- seqColStart = selColStart;
- seqColEnd = Math.min(seqColEnd, selColEnd);
- residueOffset = seq.findPosition(selection.getStartRes())
- - seq.getStart();
+ seqColStart = Math.max(seqColStart, selectionStart);
+ seqColEnd = Math.min(seqColEnd, selectionEnd);
}
- String seqString = seq.getSequenceAsString(seqColStart, seqColEnd + 1);
+ String seqString = seq.getSequenceAsString(seqColStart, seqColEnd + 1);
String noGaps = AlignSeq.extractGaps(Comparison.GapChars, seqString);
- SearchResultMatchI lastMatch = null;
- boolean found = false;
-
- for (int r = resIndex; r < noGaps.length(); r++)
+ if (searchPattern.search(noGaps))
+ {
+ int sequenceStartPosition = seq.findPosition(seqColStart);
+ recordMatch(seq, searchPattern, sequenceStartPosition);
+ return true;
+ }
+ else
{
/*
- * searchFrom position is base 0, r is base 1,
- * so search is from the position after the r'th residue
+ * no match - advance columnIndex past this visible region
+ * so the next visible region (if any) is searched next
*/
- if (regex.searchFrom(noGaps, r))
- {
- resIndex = regex.matchedFrom();
- resIndex += residueOffset; // add back #residues before selection region
- int matchStartPosition = resIndex + seq.getStart();
- int matchEndPosition = matchStartPosition + regex.charsMatched()
- - 1;
- if (lastMatch == null || !lastMatch.contains(seq,
- matchStartPosition, matchEndPosition))
- {
- lastMatch = searchResults.addResult(seq, matchStartPosition,
- matchEndPosition);
- found = true;
- }
- if (!findAll)
- {
- resIndex++;
- return true;
- }
- r = resIndex;
- }
- else
- {
- break;
- }
+ columnIndex = seqColEnd + 1;
+ }
+
+ return false;
+ }
+
+ /**
+ * Adds the match held in the <code>searchPattern</code> Regex to the
+ * <code>searchResults</code>, unless it is a subregion of the last match
+ * recorded. <code>columnIndex</code> is advanced to the position after the
+ * start of the matched region, ready for the next search. Answers true if a
+ * match was added, else false.
+ *
+ * @param seq
+ * @param searchPattern
+ * @param firstResiduePosition
+ * @return
+ */
+ protected boolean recordMatch(SequenceI seq, Regex searchPattern,
+ int firstResiduePosition)
+ {
+ /*
+ * get start/end of the match in sequence coordinates
+ */
+ int offset = searchPattern.matchedFrom();
+ int matchStartPosition = firstResiduePosition + offset;
+ int matchEndPosition = matchStartPosition
+ + searchPattern.charsMatched() - 1;
+
+ /*
+ * update columnIndex to next column after the start of the match
+ * (findIndex returns a value base 1, columnIndex is held base 0)
+ */
+ columnIndex = seq.findIndex(matchStartPosition);
+
+ /*
+ * check that this match is not a subset of the previous one (JAL-2302)
+ */
+ List<SearchResultMatchI> matches = searchResults.getResults();
+ SearchResultMatchI lastMatch = matches.isEmpty() ? null
+ : matches.get(matches.size() - 1);
+
+ if (lastMatch == null || !lastMatch.contains(seq, matchStartPosition,
+ matchEndPosition))
+ {
+ searchResults.addResult(seq, matchStartPosition, matchEndPosition);
+ return true;
}
- return found;
+
+ return false;
}
/**
* <li>match search string to sequence id</li>
* <li>match search string to sequence description (optional)</li>
* </ul>
- * Answers true if a match is found and we are not doing 'find all' (so this
- * search action is complete), else false.
+ * Answers true if a match is found, else false.
*
* @param seq
* @param searchString
- * @param regex
+ * @param searchPattern
* @return
*/
protected boolean doNonMotifSearches(SequenceI seq, String searchString,
- Regex regex)
+ Regex searchPattern)
{
- if (searchForResidueNumber(seq, searchString) && !findAll)
+ /*
+ * position sequence search to start of sequence
+ */
+ columnIndex = 0;
+
+ if (searchForResidueNumber(seq, searchString))
{
return true;
}
- if (searchSequenceName(seq, regex) && !findAll)
+ if (searchSequenceName(seq, searchPattern))
{
return true;
}
- if (searchSequenceDescription(seq, regex) && !findAll)
+ if (includeDescription && searchSequenceDescription(seq, searchPattern))
{
return true;
}
}
/**
- * Searches for a match with the sequence description, if that option was
- * requested, and if found, adds the sequence to the list of match ids (but
- * not as a duplicate). Answers true if a match was added, else false.
+ * Searches for a match with the sequence description, and if found, adds the
+ * sequence to the list of match ids (but not as a duplicate). Answers true if
+ * a match was added, else false.
*
* @param seq
- * @param regex
+ * @param searchPattern
* @return
*/
- protected boolean searchSequenceDescription(SequenceI seq, Regex regex)
+ protected boolean searchSequenceDescription(SequenceI seq, Regex searchPattern)
{
- if (!includeDescription)
- {
- return false;
- }
String desc = seq.getDescription();
- if (desc != null && regex.search(desc) && !idMatch.contains(seq))
+ if (desc != null && searchPattern.search(desc) && !idMatch.contains(seq))
{
idMatch.addElement(seq);
return true;
* a match was added, else false.
*
* @param seq
- * @param regex
+ * @param searchPattern
* @return
*/
- protected boolean searchSequenceName(SequenceI seq, Regex regex)
+ protected boolean searchSequenceName(SequenceI seq, Regex searchPattern)
{
- if (regex.search(seq.getName()) && !idMatch.contains(seq))
+ if (searchPattern.search(seq.getName()) && !idMatch.contains(seq))
{
idMatch.addElement(seq);
return true;
/**
* Tries to interpret the search string as a residue position, and if valid,
- * adds the position to the search results
+ * adds the position to the search results and returns true, else answers
+ * false
*/
protected boolean searchForResidueNumber(SequenceI seq, String searchString)
{
}
/**
- * Returns the (possibly empty) list of matching sequences (when search
- * includes searching sequence names)
+ * Returns the (possibly empty) list of sequences matched on sequence name or
+ * description
*
* @return
*/
}
/**
- * @return the searchResults
+ * Answers the search results (possibly empty) from the last search
+ *
+ * @return
*/
public SearchResultsI getSearchResults()
{
}
/**
- * @return the resIndex
+ * Answers the absolute column position (base 0, including any hidden columns)
+ * of the start of the last sequence motif (residue pattern) match found. A
+ * 'Find next' will search from the next position.
+ *
+ * @return
*/
- public int getResIndex()
+ public int getColumnIndex()
{
- return resIndex;
+ return columnIndex;
}
/**
- * @return the seqIndex
+ * Answers the offset in the alignment (0..) of the sequence in which the last
+ * match was found (if any)
+ *
+ * @return
*/
- public int getSeqIndex()
+ public int getSequenceIndex()
{
- return seqIndex;
+ return sequenceIndex;
}
/**