/*
* Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
* Copyright (C) $$Year-Rel$$ The Jalview Authors
*
* This file is part of Jalview.
*
* Jalview is free software: you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation, either version 3
* of the License, or (at your option) any later version.
*
* Jalview is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty
* of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Jalview. If not, see .
* The Jalview Authors are detailed in the 'AUTHORS' file.
*/
package jalview.analysis;
import jalview.datamodel.AlignmentI;
import jalview.datamodel.SearchResultMatchI;
import jalview.datamodel.SearchResults;
import jalview.datamodel.SearchResultsI;
import jalview.datamodel.SequenceGroup;
import jalview.datamodel.SequenceI;
import jalview.datamodel.VisibleContigsIterator;
import jalview.util.Comparison;
import java.util.List;
import java.util.Vector;
import com.stevesoft.pat.Regex;
/**
* Implements the search algorithm for the Find dialog
*/
public class Finder
{
/*
* matched residue locations
*/
private SearchResultsI searchResults;
/*
* sequences matched by id or description
*/
private Vector idMatch;
/*
* the alignment to search over
*/
private AlignmentI alignment;
/*
* (optional) selection to restrict search to
*/
private SequenceGroup selection;
/*
* set true for case-sensitive search (default is false)
*/
private boolean caseSensitive;
/*
* set true to search sequence description (default is false)
*/
private boolean includeDescription;
/*
* set true to return all matches (default is next match only)
*/
private boolean findAll;
/*
* sequence index in alignment to search from
*/
private int sequenceIndex;
/*
* column position in sequence to search from, base 0
* - absolute column number including any hidden columns
* (position of last match for a repeat search)
*/
private int columnIndex;
/**
* Constructor to start searching an alignment, optionally restricting results
* to a selection
*
* @param al
* @param sel
*/
public Finder(AlignmentI al, SequenceGroup sel)
{
this(al, sel, 0, -1);
}
/**
* Constructor to resume search at given sequence and residue on alignment and
* (optionally) restricted to a selection
*
* @param al
* @param sel
* @param seqindex
* @param colindex
*/
public Finder(AlignmentI al, SequenceGroup sel, int seqindex,
int colindex)
{
this.alignment = al;
this.selection = sel;
this.sequenceIndex = seqindex;
this.columnIndex = colindex;
}
/**
* Performs a find for the given search string. By default the next match is
* found, but if setFindAll(true) has been called, then all matches are found.
* Sequences matched by id or description can be retrieved by getIdMatch(),
* and matched residue patterns by getSearchResults().
*
* @param theSearchString
* @return
*/
public void find(String theSearchString)
{
if (findAll)
{
sequenceIndex = 0;
columnIndex = -1;
}
String searchString = caseSensitive ? theSearchString
: theSearchString.toUpperCase();
Regex searchPattern = new Regex(searchString);
searchPattern.setIgnoreCase(!caseSensitive);
searchResults = new SearchResults();
idMatch = new Vector<>();
if (selection != null && selection.getSize() < 1)
{
selection = null; // ? ignore column-only selection
}
int end = alignment.getHeight();
while (sequenceIndex < end)
{
SequenceI seq = alignment.getSequenceAt(sequenceIndex);
boolean found = findNext(seq, searchString, searchPattern);
if (found && !findAll)
{
return;
}
if (!found)
{
sequenceIndex++;
columnIndex = -1;
}
}
}
/**
* Answers the start-end column range of the visible region starting at or
* after the given column. if there are no hidden columns, this just returns
* the remaining width of the alignment. Answers null if there are no visible
* columns at or after column
.
*/
protected int[] getNextVisibleRegion(int column)
{
VisibleContigsIterator visibleRegions = alignment.getHiddenColumns()
.getVisContigsIterator(column, alignment.getWidth(),
false);
return visibleRegions.hasNext() ? visibleRegions.next() : null;
}
/**
* Finds the next match in the given sequence, starting at column at
* columnIndex
. Answers true if a match is found, else false. If
* a match is found, columnIndex
is advanced to the column after
* the start of the matched region, ready for a search from the next position.
*
* @param seq
* @param searchString
* @param searchPattern
* @return
*/
protected boolean findNext(SequenceI seq, String searchString,
Regex searchPattern)
{
if (selection != null && !selection.contains(seq))
{
/*
* this sequence is not in the selection - advance to next sequence
*/
return false;
}
if (columnIndex < 0)
{
/*
* at start of sequence; try find by residue number, in sequence id,
* or (optionally) in sequence description
*/
if (doNonMotifSearches(seq, searchString, searchPattern))
{
return true;
}
}
/*
* search for next match in sequence string
*/
int end = seq.getLength();
while (columnIndex < end)
{
if (searchNextVisibleRegion(seq, searchPattern))
{
return true;
}
}
return false;
}
/**
* Searches the sequence, starting from columnIndex
, and adds the
* next match (if any) to searchResults
. The search is restricted
* to the next visible column region, and to the selection
region
* if there is one. Answers true if a match is added, else false.
*
* @param seq
* @param searchPattern
* @return
*/
protected boolean searchNextVisibleRegion(SequenceI seq, Regex searchPattern)
{
/*
* sequence columns to search (working in absolute column
* positions, base 0, including any hidden columns)
*/
int seqColStart = columnIndex;
int seqColEnd = seq.getLength() - 1;
/*
* restrict search to (next) visible column region,
* in case there are hidden columns
*/
int[] visible = getNextVisibleRegion(columnIndex);
if (visible != null)
{
seqColStart = Math.max(seqColStart, visible[0]);
seqColEnd = Math.min(seqColEnd, visible[1]);
}
else
{
columnIndex = seqColEnd + 1;
return false;
}
/*
* restrict search to selected region if there is one
*/
if (selection != null)
{
int selectionStart = selection.getStartRes();
int selectionEnd = selection.getEndRes();
if (selectionStart > seqColEnd || selectionEnd < seqColStart)
{
/*
* sequence region doesn't overlap selection region -
* no match, advance to next visible region
*/
columnIndex = seqColEnd + 1;
return false;
}
seqColStart = Math.max(seqColStart, selectionStart);
seqColEnd = Math.min(seqColEnd, selectionEnd);
}
String seqString = seq.getSequenceAsString(seqColStart, seqColEnd + 1);
String noGaps = AlignSeq.extractGaps(Comparison.GapChars, seqString);
if (searchPattern.search(noGaps))
{
int sequenceStartPosition = seq.findPosition(seqColStart);
recordMatch(seq, searchPattern, sequenceStartPosition);
return true;
}
else
{
/*
* no match - advance columnIndex past this visible region
* so the next visible region (if any) is searched next
*/
columnIndex = seqColEnd + 1;
}
return false;
}
/**
* Adds the match held in the searchPattern
Regex to the
* searchResults
, unless it is a subregion of the last match
* recorded. columnIndex
is advanced to the position after the
* start of the matched region, ready for the next search. Answers true if a
* match was added, else false.
*
* @param seq
* @param searchPattern
* @param firstResiduePosition
* @return
*/
protected boolean recordMatch(SequenceI seq, Regex searchPattern,
int firstResiduePosition)
{
/*
* get start/end of the match in sequence coordinates
*/
int offset = searchPattern.matchedFrom();
int matchStartPosition = firstResiduePosition + offset;
int matchEndPosition = matchStartPosition
+ searchPattern.charsMatched() - 1;
/*
* update columnIndex to next column after the start of the match
* (findIndex returns a value base 1, columnIndex is held base 0)
*/
columnIndex = seq.findIndex(matchStartPosition);
/*
* check that this match is not a subset of the previous one (JAL-2302)
*/
List matches = searchResults.getResults();
SearchResultMatchI lastMatch = matches.isEmpty() ? null
: matches.get(matches.size() - 1);
if (lastMatch == null || !lastMatch.contains(seq, matchStartPosition,
matchEndPosition))
{
searchResults.addResult(seq, matchStartPosition, matchEndPosition);
return true;
}
return false;
}
/**
* Does searches other than for residue patterns. Currently this includes
*
* - find residue by position (if search string is a number)
* - match search string to sequence id
* - match search string to sequence description (optional)
*
* Answers true if a match is found, else false.
*
* @param seq
* @param searchString
* @param searchPattern
* @return
*/
protected boolean doNonMotifSearches(SequenceI seq, String searchString,
Regex searchPattern)
{
/*
* position sequence search to start of sequence
*/
columnIndex = 0;
if (searchForResidueNumber(seq, searchString))
{
return true;
}
if (searchSequenceName(seq, searchPattern))
{
return true;
}
if (includeDescription && searchSequenceDescription(seq, searchPattern))
{
return true;
}
return false;
}
/**
* Searches for a match with the sequence description, and if found, adds the
* sequence to the list of match ids (but not as a duplicate). Answers true if
* a match was added, else false.
*
* @param seq
* @param searchPattern
* @return
*/
protected boolean searchSequenceDescription(SequenceI seq, Regex searchPattern)
{
String desc = seq.getDescription();
if (desc != null && searchPattern.search(desc) && !idMatch.contains(seq))
{
idMatch.addElement(seq);
return true;
}
return false;
}
/**
* Searches for a match with the sequence name, and if found, adds the
* sequence to the list of match ids (but not as a duplicate). Answers true if
* a match was added, else false.
*
* @param seq
* @param searchPattern
* @return
*/
protected boolean searchSequenceName(SequenceI seq, Regex searchPattern)
{
if (searchPattern.search(seq.getName()) && !idMatch.contains(seq))
{
idMatch.addElement(seq);
return true;
}
return false;
}
/**
* Tries to interpret the search string as a residue position, and if valid,
* adds the position to the search results and returns true, else answers
* false
*/
protected boolean searchForResidueNumber(SequenceI seq, String searchString)
{
try
{
int res = Integer.parseInt(searchString);
if (seq.getStart() <= res && seq.getEnd() >= res)
{
searchResults.addResult(seq, res, res);
return true;
}
} catch (NumberFormatException ex)
{
}
return false;
}
/**
* Sets whether the search is case sensitive (default is no)
*
* @param value
*/
public void setCaseSensitive(boolean value)
{
this.caseSensitive = value;
}
/**
* Sets whether search returns all matches. Default is to return the next
* match only.
*
* @param value
*/
public void setFindAll(boolean value)
{
this.findAll = value;
}
/**
* Returns the (possibly empty) list of sequences matched on sequence name or
* description
*
* @return
*/
public Vector getIdMatch()
{
return idMatch;
}
/**
* Answers the search results (possibly empty) from the last search
*
* @return
*/
public SearchResultsI getSearchResults()
{
return searchResults;
}
/**
* Answers the absolute column position (base 0, including any hidden columns)
* of the start of the last sequence motif (residue pattern) match found. A
* 'Find next' will search from the next position.
*
* @return
*/
public int getColumnIndex()
{
return columnIndex;
}
/**
* Answers the offset in the alignment (0..) of the sequence in which the last
* match was found (if any)
*
* @return
*/
public int getSequenceIndex()
{
return sequenceIndex;
}
/**
* Sets whether search also searches in sequence description text (default is
* no)
*
* @param value
*/
public void setIncludeDescription(boolean value)
{
this.includeDescription = value;
}
}