private Vector<SequenceI> idMatches;
/*
- * the viewport to search over
+ * the viewport to search over, if known
+ * (may restrict search by selection group, or for hidden columns)
*/
private AlignViewportI viewport;
/*
+ * the alignment to search over
+ */
+ private AlignmentI alignment;
+
+ /*
* sequence index in alignment to search from
*/
private int sequenceIndex;
*/
public Finder(AlignViewportI av)
{
+ this(av, av.getAlignment());
+ }
+
+ /**
+ * Constructor given an alignment to search
+ *
+ * @param align
+ */
+ public Finder(AlignmentI align)
+ {
+ this(null, align);
+ }
+
+ private Finder(AlignViewportI av, AlignmentI align)
+ {
+ this.alignment = align;
this.viewport = av;
this.sequenceIndex = 0;
this.columnIndex = -1;
searchResults = new SearchResults();
idMatches = new Vector<>();
- SequenceGroup selection = viewport.getSelectionGroup();
+ SequenceGroup selection = viewport == null ? null
+ : viewport.getSelectionGroup();
if (selection != null && selection.getSize() < 1)
{
selection = null; // ? ignore column-only selection
}
- AlignmentI alignment = viewport.getAlignment();
int end = alignment.getHeight();
while (sequenceIndex < end)
* restrict search to (next) visible column region,
* in case there are hidden columns
*/
- AlignmentI alignment = viewport.getAlignment();
VisibleContigsIterator visibleRegions = alignment.getHiddenColumns()
.getVisContigsIterator(column, alignment.getWidth(),
false);
/*
* restrict search to selected region if there is one
*/
- SequenceGroup selection = viewport.getSelectionGroup();
+ SequenceGroup selection = viewport == null ? null
+ : viewport.getSelectionGroup();
if (selection != null)
{
int selectionStart = selection.getStartRes();
protected boolean findNextMatch(SequenceI seq, String searchString,
Regex searchPattern, boolean matchDescription)
{
- SequenceGroup selection = viewport.getSelectionGroup();
+ SequenceGroup selection = viewport == null ? null
+ : viewport.getSelectionGroup();
if (selection != null && !selection.contains(seq))
{
/*
package jalview.io;
import jalview.analysis.AlignmentUtils;
+import jalview.analysis.Finder;
import jalview.analysis.SequenceIdMatcher;
import jalview.api.AlignViewportI;
import jalview.api.FeatureColourI;
import jalview.api.FeatureRenderer;
import jalview.api.FeaturesSourceI;
+import jalview.api.FinderI;
import jalview.datamodel.AlignedCodonFrame;
import jalview.datamodel.Alignment;
import jalview.datamodel.AlignmentI;
import jalview.datamodel.MappedFeatures;
+import jalview.datamodel.SearchResultMatchI;
import jalview.datamodel.SequenceDummy;
import jalview.datamodel.SequenceFeature;
import jalview.datamodel.SequenceI;
private static final String ENDFILTERS = "ENDFILTERS";
+ private static final String STARTMOTIFS = "STARTMOTIFS";
+
+ private static final String ENDMOTIFS = "ENDMOTIFS";
+
private static final String ID_NOT_SPECIFIED = "ID_NOT_SPECIFIED";
private static final String NOTE = "Note";
while ((line = nextLine()) != null)
{
+ line = line.trim();
// skip comments/process pragmas
if (line.length() == 0 || line.startsWith("#"))
{
if (gffColumns.length > 0 && gffColumns.length < 4)
{
/*
- * if 2 or 3 tokens, we anticipate either 'startgroup', 'endgroup' or
- * a feature type colour specification
+ * if 2 or 3 tokens, we anticipate one of either
+ * STARTGROUP ENDGROUP STARTFILTERS STARTMOTIFS
+ * or a feature type and colour specification
*/
String ft = gffColumns[0];
if (ft.equalsIgnoreCase(STARTFILTERS))
parseFilters(filters);
continue;
}
+ if (ft.equalsIgnoreCase(STARTMOTIFS))
+ {
+ parseMotifs(align, featureGroup);
+ continue;
+ }
if (ft.equalsIgnoreCase(STARTGROUP))
{
featureGroup = gffColumns[1];
}
else if (ft.equalsIgnoreCase(ENDGROUP))
{
- // We should check whether this is the current group,
- // but at present there's no way of showing more than 1 group
featureGroup = null;
}
else
}
/*
- * if not a comment, GFF pragma, startgroup, endgroup or feature
- * colour specification, that just leaves a feature details line
+ * if not handled above, that just leaves a feature details line
* in either Jalview or GFF format
*/
if (gffVersion == 0)
}
/**
+ * Reads lines up to and including the next ENDMOTIFS, and processes each one
+ * by
+ * <ul>
+ * <li>searching the alignment for the supplied motif (first column)</li>
+ * <li>creating features for matches, with feature type, description and
+ * (optionally) score given in the remaining columns</li>
+ * </ul>
+ *
+ * @param alignment
+ * @param featureGroup
+ * @throws IOException
+ */
+ protected void parseMotifs(AlignmentI alignment, String featureGroup)
+ throws IOException
+ {
+ FinderI finder = new Finder(alignment);
+ String line;
+ while ((line = nextLine()) != null)
+ {
+ if (line.toUpperCase().startsWith(ENDMOTIFS))
+ {
+ return;
+ }
+ String[] tokens = line.split(TAB_REGEX);
+ if (tokens.length != 3 && tokens.length != 4)
+ {
+ System.err.println(String.format("Invalid token count %d for %s",
+ tokens.length, line));
+ }
+ String motif = tokens[0];
+ String featureType = tokens[1];
+ String description = tokens[2];
+ float score = 0f;
+ if (tokens.length > 3)
+ {
+ try
+ {
+ score = Float.valueOf(tokens[3]);
+ } catch (NumberFormatException e)
+ {
+ System.err.println("Invalid score in " + line);
+ }
+ }
+ finder.findAll(motif, true, false);
+ List<SearchResultMatchI> matches = finder.getSearchResults()
+ .getResults();
+ for (SearchResultMatchI match : matches)
+ {
+ SequenceFeature sf = new SequenceFeature(featureType, description,
+ match.getStart(), match.getEnd(), score, featureGroup);
+ match.getSequence().addSequenceFeature(sf);
+ }
+ }
+ }
+
+ /**
* Reads input lines from STARTFILTERS to ENDFILTERS and adds a feature type
* filter to the map for each line parsed. After exit from this method,
* nextLine() should return the line after ENDFILTERS (or we are already at
String[] tokens = line.split(TAB_REGEX);
if (tokens.length != 2)
{
- System.err.println(String.format("Invalid token count %d for %d",
+ System.err.println(String.format("Invalid token count %d for %s",
tokens.length, line));
}
else
import static org.testng.AssertJUnit.assertEquals;
import static org.testng.AssertJUnit.assertFalse;
import static org.testng.AssertJUnit.assertNotNull;
+import static org.testng.AssertJUnit.assertNull;
import static org.testng.AssertJUnit.assertSame;
import static org.testng.AssertJUnit.assertTrue;
import static org.testng.internal.junit.ArrayAsserts.assertArrayEquals;
public class FeaturesFileTest
{
+ private static final String TAB = "\t";
private static String simpleGffFile = "examples/testdata/simpleGff3.gff";
@AfterClass(alwaysRun = true)
+ "ENDGROUP\tgrp2\n";
assertEquals(expected, exported);
}
+
+ /**
+ * Test parsing a features file with Jalview format features, including
+ * STARTMOTIFS/ENDMOTIFS lines with motifs to be matched to create features
+ *
+ * @throws Exception
+ */
+ @Test(groups = { "Functional" })
+ public void testParse_jalviewFeaturesWithMotifs() throws IOException
+ {
+ File f = new File("examples/uniref50.fa");
+ AlignmentI al = readAlignmentFile(f);
+ AlignFrame af = new AlignFrame(al, 500, 500);
+ Map<String, FeatureColourI> colours = af.getFeatureRenderer()
+ .getFeatureColours();
+
+ /*
+ * hide columns with YKV motif; these should not get
+ * matched by the Finder
+ */
+ al.getHiddenColumns().hideColumns(62, 64);
+
+ // @formatter:off
+ String featureData =
+ /*
+ * features in the null grup
+ */
+ "HELIX" + TAB + "blue\n" +
+ "MOTIF1" + TAB + "green\n" +
+ "MOTIF2" + TAB + "250,200,150|100,50,0|-3.9|4.5|above|-2.0\n" +
+ "adescription" + TAB + "FER_CAPAN" + TAB + "-1" + TAB + "42" + TAB + "45" + TAB + "HELIX\n" +
+ "STARTMOTIFS\n" +
+ "FLP" + TAB + "MOTIF1" + TAB + "flxMotifP\n" +
+ "F[LR]N" + TAB + "MOTIF1" + TAB + "flxMotifN\n" +
+ "fld" + TAB + "MOTIF1" + TAB + "flxMotifD\n" +
+ "YKV" + TAB + "MOTIF1" + TAB + "ykvMotif\n" +
+ "ENDMOTIFS\n" +
+ /*
+ * features in group uniprot
+ */
+ "STARTGROUP" + TAB + "uniprot\n" +
+ "bdescription" + TAB + "FER_CAPAN" + TAB + "-1" + TAB + "47" + TAB + "48" + TAB + "HELIX\n" +
+ "STARTMOTIFS\n" +
+ "FLG" + TAB + "MOTIF1" + TAB + "flxMotifG\n" +
+ "VTT" + TAB + "MOTIF2" + TAB + "vxtMotifT" + TAB + "-3.21\n" +
+ "VRT" + TAB + "MOTIF2" + TAB + "vxtMotifR\n" +
+ "ENDMOTIFS\n" +
+ "ENDGROUP";
+ // @formatter:on
+ FeaturesFile featuresFile = new FeaturesFile(featureData,
+ DataSourceType.PASTE);
+ assertTrue("Failed to parse features file",
+ featuresFile.parse(al, colours, true));
+
+ // verify HELIX features were parsed as normal
+ List<SequenceFeature> sfs = al.getSequenceAt(1).findFeatures(0, 999,
+ "HELIX");
+ assertEquals(2, sfs.size());
+ SequenceFeature sf = sfs.get(0);
+ assertNull(sf.getFeatureGroup());
+ assertEquals(42, sf.getBegin());
+ assertEquals(45, sf.getEnd());
+ assertEquals("adescription", sf.getDescription());
+ sf = sfs.get(1);
+ assertEquals("uniprot", sf.getFeatureGroup());
+ assertEquals(47, sf.getBegin());
+ assertEquals(48, sf.getEnd());
+ assertEquals("bdescription", sf.getDescription());
+
+ /*
+ * feature type MOTIF1
+ * FLP motif should match FER1_SOLLC/13-15 and Q93XJ9_SOLTU/13-15
+ * F[LR]N should match O80429_MAIZE/107-109
+ * fld should match nothing (as case sensitive)
+ * feature group should be null for the above
+ * FLG should match FER1_PEA/36-38, feature group uniprot
+ * YKV should match nothing as entirely within hidden columns
+ */
+ for (SequenceI seq : al.getSequences())
+ {
+ List<SequenceFeature> features = seq.findFeatures(0, 9999, "MOTIF1");
+ String name = seq.getName();
+ if (name.equals("FER1_SOLLC") || name.equals("Q93XJ9_SOLTU"))
+ {
+ assertEquals(1, features.size());
+ sf = features.get(0);
+ assertNull(sf.getFeatureGroup());
+ assertEquals(13, sf.getBegin());
+ assertEquals(15, sf.getEnd());
+ assertEquals("flxMotifP", sf.getDescription());
+ }
+ else if (name.equals("O80429_MAIZE"))
+ {
+ assertEquals(1, features.size());
+ sf = features.get(0);
+ assertNull(sf.getFeatureGroup());
+ assertEquals(107, sf.getBegin());
+ assertEquals(109, sf.getEnd());
+ assertEquals("flxMotifN", sf.getDescription());
+ }
+ else if (name.equals("FER1_PEA"))
+ {
+ assertEquals(1, features.size());
+ sf = features.get(0);
+ assertEquals("uniprot", sf.getFeatureGroup());
+ assertEquals(36, sf.getBegin());
+ assertEquals(38, sf.getEnd());
+ assertEquals("flxMotifG", sf.getDescription());
+ }
+ else
+ {
+ assertTrue("MOTIF1 features found for " + name, features.isEmpty());
+ }
+ }
+
+ /*
+ * feature type MOTIF2
+ * VTT motif should match FER1_PEA/26-28
+ * VRT should match nothing
+ */
+ for (SequenceI seq : al.getSequences())
+ {
+ List<SequenceFeature> features = seq.findFeatures(0, 9999, "MOTIF2");
+ String name = seq.getName();
+ if (name.equals("FER1_PEA"))
+ {
+ assertEquals(1, features.size());
+ sf = features.get(0);
+ assertEquals("uniprot", sf.getFeatureGroup());
+ assertEquals(26, sf.getBegin());
+ assertEquals(28, sf.getEnd());
+ assertEquals("vxtMotifT", sf.getDescription());
+ assertEquals(-3.21f, sf.getScore());
+ }
+ else
+ {
+ assertTrue("MOTIF2 features found for " + name, features.isEmpty());
+ assertTrue(features.isEmpty());
+ }
+ }
+ }
}