/*
* Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
* Copyright (C) $$Year-Rel$$ The Jalview Authors
*
* This file is part of Jalview.
*
* Jalview is free software: you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation, either version 3
* of the License, or (at your option) any later version.
*
* Jalview is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty
* of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Jalview. If not, see .
* The Jalview Authors are detailed in the 'AUTHORS' file.
*/
package jalview.analysis;
import jalview.datamodel.AlignmentI;
import jalview.datamodel.SearchResultMatchI;
import jalview.datamodel.SearchResults;
import jalview.datamodel.SearchResultsI;
import jalview.datamodel.SequenceGroup;
import jalview.datamodel.SequenceI;
import jalview.util.Comparison;
import java.util.Vector;
import com.stevesoft.pat.Regex;
/**
* Implements the search algorithm for the Find dialog
*/
public class Finder
{
/*
* match residue locations
*/
private SearchResultsI searchResults;
/*
* sequences matched by id or description
*/
private Vector idMatch;
/*
* the alignment to search over
*/
private AlignmentI alignment;
/*
* (optional) selection to restrict search to
*/
private SequenceGroup selection;
/*
* set true for case-sensitive search (default is false)
*/
private boolean caseSensitive;
/*
* set true to search sequence description (default is false)
*/
private boolean includeDescription;
/*
* set true to return all matches (default is next match only)
*/
private boolean findAll;
/*
* sequence index in alignment to search from
*/
private int seqIndex;
/*
* residue position in sequence to search from, base 1
* (position of last match for a repeat search)
*/
private int resIndex;
/**
* Constructor to start searching an alignment, optionally restricting results
* to a selection
*
* @param al
* @param sel
*/
public Finder(AlignmentI al, SequenceGroup sel)
{
this(al, sel, 0, -1);
}
/**
* Constructor to resume search at given sequence and residue on alignment and
* (optionally) restricted to a selection
*
* @param al
* @param sel
* @param seqindex
* @param resindex
*/
public Finder(AlignmentI al, SequenceGroup sel, int seqindex,
int resindex)
{
this.alignment = al;
this.selection = sel;
this.seqIndex = seqindex;
this.resIndex = resindex;
}
/**
* Performs a find for the given search string. By default the next match is
* found, but if setFindAll(true) has been called, then all matches are found.
* Sequences matched by id or description can be retrieved by getIdMatch(),
* and matched residue patterns by getSearchResults().
*
* @param theSearchString
* @return
*/
public void find(String theSearchString)
{
String searchString = caseSensitive ? theSearchString.toUpperCase()
: theSearchString;
Regex regex = new Regex(searchString);
regex.setIgnoreCase(!caseSensitive);
searchResults = new SearchResults();
idMatch = new Vector<>();
if (selection != null && selection.getSize() < 1)
{
selection = null; // ? ignore column-only selection
}
boolean finished = false;
int end = alignment.getHeight();
while (!finished && (seqIndex < end))
{
SequenceI seq = alignment.getSequenceAt(seqIndex);
if ((selection != null) && !selection.contains(seq))
{
// this sequence is not in the selection - skip to next sequence
seqIndex++;
resIndex = -1;
continue;
}
if (resIndex < 0)
{
/*
* at start of sequence; try find by residue number, in sequence id,
* or (optionally) in sequence description
*/
resIndex = 0;
if (doNonMotifSearches(seq, searchString, regex))
{
return;
}
}
finished = searchSequenceString(seq, regex) && !findAll;
if (!finished)
{
seqIndex++;
resIndex = -1;
}
}
}
/**
* Searches the sequence, starting from resIndex
(base 1), and
* adds matches to searchResults
. The search is restricted to the
* selection
region if there is one. Answers true if any match is
* added, else false.
*
* @param seq
* @param regex
* @return
*/
protected boolean searchSequenceString(SequenceI seq, Regex regex)
{
/*
* Restrict search to selected region if there is one
*/
int seqColStart = 0;
int seqColEnd = seq.getLength() - 1;
int residueOffset = 0;
if (selection != null)
{
int selColEnd = selection.getEndRes();
int selColStart = selection.getStartRes();
if (selColStart > seqColEnd)
{
return false; // sequence doesn't reach selection region
}
seqColStart = selColStart;
seqColEnd = Math.min(seqColEnd, selColEnd);
residueOffset = seq.findPosition(selection.getStartRes())
- seq.getStart();
}
String seqString = seq.getSequenceAsString(seqColStart, seqColEnd + 1);
String noGaps = AlignSeq.extractGaps(Comparison.GapChars, seqString);
SearchResultMatchI lastMatch = null;
boolean found = false;
for (int r = resIndex; r < noGaps.length(); r++)
{
/*
* searchFrom position is base 0, r is base 1,
* so search is from the position after the r'th residue
*/
if (regex.searchFrom(noGaps, r))
{
resIndex = regex.matchedFrom();
resIndex += residueOffset; // add back #residues before selection region
int matchStartPosition = resIndex + seq.getStart();
int matchEndPosition = matchStartPosition + regex.charsMatched()
- 1;
if (lastMatch == null || !lastMatch.contains(seq,
matchStartPosition, matchEndPosition))
{
lastMatch = searchResults.addResult(seq, matchStartPosition,
matchEndPosition);
found = true;
}
if (!findAll)
{
resIndex++;
return true;
}
r = resIndex;
}
else
{
break;
}
}
return found;
}
/**
* Does searches other than for residue patterns. Currently this includes
*
* - find residue by position (if search string is a number)
* - match search string to sequence id
* - match search string to sequence description (optional)
*
* Answers true if a match is found and we are not doing 'find all' (so this
* search action is complete), else false.
*
* @param seq
* @param searchString
* @param regex
* @return
*/
protected boolean doNonMotifSearches(SequenceI seq, String searchString,
Regex regex)
{
if (searchForResidueNumber(seq, searchString) && !findAll)
{
return true;
}
if (searchSequenceName(seq, regex) && !findAll)
{
return true;
}
if (searchSequenceDescription(seq, regex) && !findAll)
{
return true;
}
return false;
}
/**
* Searches for a match with the sequence description, if that option was
* requested, and if found, adds the sequence to the list of match ids (but
* not as a duplicate). Answers true if a match was added, else false.
*
* @param seq
* @param regex
* @return
*/
protected boolean searchSequenceDescription(SequenceI seq, Regex regex)
{
if (!includeDescription)
{
return false;
}
String desc = seq.getDescription();
if (desc != null && regex.search(desc) && !idMatch.contains(seq))
{
idMatch.addElement(seq);
return true;
}
return false;
}
/**
* Searches for a match with the sequence name, and if found, adds the
* sequence to the list of match ids (but not as a duplicate). Answers true if
* a match was added, else false.
*
* @param seq
* @param regex
* @return
*/
protected boolean searchSequenceName(SequenceI seq, Regex regex)
{
if (regex.search(seq.getName()) && !idMatch.contains(seq))
{
idMatch.addElement(seq);
return true;
}
return false;
}
/**
* Tries to interpret the search string as a residue position, and if valid,
* adds the position to the search results
*/
protected boolean searchForResidueNumber(SequenceI seq, String searchString)
{
try
{
int res = Integer.parseInt(searchString);
if (seq.getStart() <= res && seq.getEnd() >= res)
{
searchResults.addResult(seq, res, res);
return true;
}
} catch (NumberFormatException ex)
{
}
return false;
}
/**
* Sets whether the search is case sensitive (default is no)
*
* @param value
*/
public void setCaseSensitive(boolean value)
{
this.caseSensitive = value;
}
/**
* Sets whether search returns all matches. Default is to return the next
* match only.
*
* @param value
*/
public void setFindAll(boolean value)
{
this.findAll = value;
}
/**
* Returns the (possibly empty) list of matching sequences (when search
* includes searching sequence names)
*
* @return
*/
public Vector getIdMatch()
{
return idMatch;
}
/**
* @return the searchResults
*/
public SearchResultsI getSearchResults()
{
return searchResults;
}
/**
* @return the resIndex
*/
public int getResIndex()
{
return resIndex;
}
/**
* @return the seqIndex
*/
public int getSeqIndex()
{
return seqIndex;
}
/**
* Sets whether search also searches in sequence description text (default is
* no)
*
* @param value
*/
public void setIncludeDescription(boolean value)
{
this.includeDescription = value;
}
}