From: gmungoc Date: Tue, 21 Jan 2020 19:40:38 +0000 (+0000) Subject: JAL-3499 process motifs in features file to create features X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=refs%2Fheads%2Ffeature%2FJAL-3499featuresFileMotifs;p=jalview.git JAL-3499 process motifs in features file to create features --- diff --git a/src/jalview/analysis/Finder.java b/src/jalview/analysis/Finder.java index 3cbef6d..61ca663 100644 --- a/src/jalview/analysis/Finder.java +++ b/src/jalview/analysis/Finder.java @@ -53,11 +53,17 @@ public class Finder implements FinderI private Vector idMatches; /* - * the viewport to search over + * the viewport to search over, if known + * (may restrict search by selection group, or for hidden columns) */ private AlignViewportI viewport; /* + * the alignment to search over + */ + private AlignmentI alignment; + + /* * sequence index in alignment to search from */ private int sequenceIndex; @@ -76,6 +82,22 @@ public class Finder implements FinderI */ public Finder(AlignViewportI av) { + this(av, av.getAlignment()); + } + + /** + * Constructor given an alignment to search + * + * @param align + */ + public Finder(AlignmentI align) + { + this(null, align); + } + + private Finder(AlignViewportI av, AlignmentI align) + { + this.alignment = align; this.viewport = av; this.sequenceIndex = 0; this.columnIndex = -1; @@ -135,13 +157,13 @@ public class Finder implements FinderI searchResults = new SearchResults(); idMatches = new Vector<>(); - SequenceGroup selection = viewport.getSelectionGroup(); + SequenceGroup selection = viewport == null ? null + : viewport.getSelectionGroup(); if (selection != null && selection.getSize() < 1) { selection = null; // ? ignore column-only selection } - AlignmentI alignment = viewport.getAlignment(); int end = alignment.getHeight(); while (sequenceIndex < end) @@ -179,7 +201,6 @@ public class Finder implements FinderI * restrict search to (next) visible column region, * in case there are hidden columns */ - AlignmentI alignment = viewport.getAlignment(); VisibleContigsIterator visibleRegions = alignment.getHiddenColumns() .getVisContigsIterator(column, alignment.getWidth(), false); @@ -195,7 +216,8 @@ public class Finder implements FinderI /* * restrict search to selected region if there is one */ - SequenceGroup selection = viewport.getSelectionGroup(); + SequenceGroup selection = viewport == null ? null + : viewport.getSelectionGroup(); if (selection != null) { int selectionStart = selection.getStartRes(); @@ -230,7 +252,8 @@ public class Finder implements FinderI protected boolean findNextMatch(SequenceI seq, String searchString, Regex searchPattern, boolean matchDescription) { - SequenceGroup selection = viewport.getSelectionGroup(); + SequenceGroup selection = viewport == null ? null + : viewport.getSelectionGroup(); if (selection != null && !selection.contains(seq)) { /* diff --git a/src/jalview/io/FeaturesFile.java b/src/jalview/io/FeaturesFile.java index a69788b..df3d352 100755 --- a/src/jalview/io/FeaturesFile.java +++ b/src/jalview/io/FeaturesFile.java @@ -21,15 +21,18 @@ package jalview.io; import jalview.analysis.AlignmentUtils; +import jalview.analysis.Finder; import jalview.analysis.SequenceIdMatcher; import jalview.api.AlignViewportI; import jalview.api.FeatureColourI; import jalview.api.FeatureRenderer; import jalview.api.FeaturesSourceI; +import jalview.api.FinderI; import jalview.datamodel.AlignedCodonFrame; import jalview.datamodel.Alignment; import jalview.datamodel.AlignmentI; import jalview.datamodel.MappedFeatures; +import jalview.datamodel.SearchResultMatchI; import jalview.datamodel.SequenceDummy; import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceI; @@ -85,6 +88,10 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI private static final String ENDFILTERS = "ENDFILTERS"; + private static final String STARTMOTIFS = "STARTMOTIFS"; + + private static final String ENDMOTIFS = "ENDMOTIFS"; + private static final String ID_NOT_SPECIFIED = "ID_NOT_SPECIFIED"; private static final String NOTE = "Note"; @@ -232,6 +239,7 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI while ((line = nextLine()) != null) { + line = line.trim(); // skip comments/process pragmas if (line.length() == 0 || line.startsWith("#")) { @@ -259,8 +267,9 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI if (gffColumns.length > 0 && gffColumns.length < 4) { /* - * if 2 or 3 tokens, we anticipate either 'startgroup', 'endgroup' or - * a feature type colour specification + * if 2 or 3 tokens, we anticipate one of either + * STARTGROUP ENDGROUP STARTFILTERS STARTMOTIFS + * or a feature type and colour specification */ String ft = gffColumns[0]; if (ft.equalsIgnoreCase(STARTFILTERS)) @@ -268,14 +277,17 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI parseFilters(filters); continue; } + if (ft.equalsIgnoreCase(STARTMOTIFS)) + { + parseMotifs(align, featureGroup); + continue; + } if (ft.equalsIgnoreCase(STARTGROUP)) { featureGroup = gffColumns[1]; } else if (ft.equalsIgnoreCase(ENDGROUP)) { - // We should check whether this is the current group, - // but at present there's no way of showing more than 1 group featureGroup = null; } else @@ -292,8 +304,7 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI } /* - * if not a comment, GFF pragma, startgroup, endgroup or feature - * colour specification, that just leaves a feature details line + * if not handled above, that just leaves a feature details line * in either Jalview or GFF format */ if (gffVersion == 0) @@ -333,6 +344,62 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI } /** + * Reads lines up to and including the next ENDMOTIFS, and processes each one + * by + * + * + * @param alignment + * @param featureGroup + * @throws IOException + */ + protected void parseMotifs(AlignmentI alignment, String featureGroup) + throws IOException + { + FinderI finder = new Finder(alignment); + String line; + while ((line = nextLine()) != null) + { + if (line.toUpperCase().startsWith(ENDMOTIFS)) + { + return; + } + String[] tokens = line.split(TAB_REGEX); + if (tokens.length != 3 && tokens.length != 4) + { + System.err.println(String.format("Invalid token count %d for %s", + tokens.length, line)); + } + String motif = tokens[0]; + String featureType = tokens[1]; + String description = tokens[2]; + float score = 0f; + if (tokens.length > 3) + { + try + { + score = Float.valueOf(tokens[3]); + } catch (NumberFormatException e) + { + System.err.println("Invalid score in " + line); + } + } + finder.findAll(motif, true, false); + List matches = finder.getSearchResults() + .getResults(); + for (SearchResultMatchI match : matches) + { + SequenceFeature sf = new SequenceFeature(featureType, description, + match.getStart(), match.getEnd(), score, featureGroup); + match.getSequence().addSequenceFeature(sf); + } + } + } + + /** * Reads input lines from STARTFILTERS to ENDFILTERS and adds a feature type * filter to the map for each line parsed. After exit from this method, * nextLine() should return the line after ENDFILTERS (or we are already at @@ -354,7 +421,7 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI String[] tokens = line.split(TAB_REGEX); if (tokens.length != 2) { - System.err.println(String.format("Invalid token count %d for %d", + System.err.println(String.format("Invalid token count %d for %s", tokens.length, line)); } else diff --git a/test/jalview/io/FeaturesFileTest.java b/test/jalview/io/FeaturesFileTest.java index 090de6f..5c7bf15 100644 --- a/test/jalview/io/FeaturesFileTest.java +++ b/test/jalview/io/FeaturesFileTest.java @@ -23,6 +23,7 @@ package jalview.io; import static org.testng.AssertJUnit.assertEquals; import static org.testng.AssertJUnit.assertFalse; import static org.testng.AssertJUnit.assertNotNull; +import static org.testng.AssertJUnit.assertNull; import static org.testng.AssertJUnit.assertSame; import static org.testng.AssertJUnit.assertTrue; import static org.testng.internal.junit.ArrayAsserts.assertArrayEquals; @@ -62,6 +63,7 @@ import org.testng.annotations.Test; public class FeaturesFileTest { + private static final String TAB = "\t"; private static String simpleGffFile = "examples/testdata/simpleGff3.gff"; @AfterClass(alwaysRun = true) @@ -899,4 +901,145 @@ public class FeaturesFileTest + "ENDGROUP\tgrp2\n"; assertEquals(expected, exported); } + + /** + * Test parsing a features file with Jalview format features, including + * STARTMOTIFS/ENDMOTIFS lines with motifs to be matched to create features + * + * @throws Exception + */ + @Test(groups = { "Functional" }) + public void testParse_jalviewFeaturesWithMotifs() throws IOException + { + File f = new File("examples/uniref50.fa"); + AlignmentI al = readAlignmentFile(f); + AlignFrame af = new AlignFrame(al, 500, 500); + Map colours = af.getFeatureRenderer() + .getFeatureColours(); + + /* + * hide columns with YKV motif; these should not get + * matched by the Finder + */ + al.getHiddenColumns().hideColumns(62, 64); + + // @formatter:off + String featureData = + /* + * features in the null grup + */ + "HELIX" + TAB + "blue\n" + + "MOTIF1" + TAB + "green\n" + + "MOTIF2" + TAB + "250,200,150|100,50,0|-3.9|4.5|above|-2.0\n" + + "adescription" + TAB + "FER_CAPAN" + TAB + "-1" + TAB + "42" + TAB + "45" + TAB + "HELIX\n" + + "STARTMOTIFS\n" + + "FLP" + TAB + "MOTIF1" + TAB + "flxMotifP\n" + + "F[LR]N" + TAB + "MOTIF1" + TAB + "flxMotifN\n" + + "fld" + TAB + "MOTIF1" + TAB + "flxMotifD\n" + + "YKV" + TAB + "MOTIF1" + TAB + "ykvMotif\n" + + "ENDMOTIFS\n" + + /* + * features in group uniprot + */ + "STARTGROUP" + TAB + "uniprot\n" + + "bdescription" + TAB + "FER_CAPAN" + TAB + "-1" + TAB + "47" + TAB + "48" + TAB + "HELIX\n" + + "STARTMOTIFS\n" + + "FLG" + TAB + "MOTIF1" + TAB + "flxMotifG\n" + + "VTT" + TAB + "MOTIF2" + TAB + "vxtMotifT" + TAB + "-3.21\n" + + "VRT" + TAB + "MOTIF2" + TAB + "vxtMotifR\n" + + "ENDMOTIFS\n" + + "ENDGROUP"; + // @formatter:on + FeaturesFile featuresFile = new FeaturesFile(featureData, + DataSourceType.PASTE); + assertTrue("Failed to parse features file", + featuresFile.parse(al, colours, true)); + + // verify HELIX features were parsed as normal + List sfs = al.getSequenceAt(1).findFeatures(0, 999, + "HELIX"); + assertEquals(2, sfs.size()); + SequenceFeature sf = sfs.get(0); + assertNull(sf.getFeatureGroup()); + assertEquals(42, sf.getBegin()); + assertEquals(45, sf.getEnd()); + assertEquals("adescription", sf.getDescription()); + sf = sfs.get(1); + assertEquals("uniprot", sf.getFeatureGroup()); + assertEquals(47, sf.getBegin()); + assertEquals(48, sf.getEnd()); + assertEquals("bdescription", sf.getDescription()); + + /* + * feature type MOTIF1 + * FLP motif should match FER1_SOLLC/13-15 and Q93XJ9_SOLTU/13-15 + * F[LR]N should match O80429_MAIZE/107-109 + * fld should match nothing (as case sensitive) + * feature group should be null for the above + * FLG should match FER1_PEA/36-38, feature group uniprot + * YKV should match nothing as entirely within hidden columns + */ + for (SequenceI seq : al.getSequences()) + { + List features = seq.findFeatures(0, 9999, "MOTIF1"); + String name = seq.getName(); + if (name.equals("FER1_SOLLC") || name.equals("Q93XJ9_SOLTU")) + { + assertEquals(1, features.size()); + sf = features.get(0); + assertNull(sf.getFeatureGroup()); + assertEquals(13, sf.getBegin()); + assertEquals(15, sf.getEnd()); + assertEquals("flxMotifP", sf.getDescription()); + } + else if (name.equals("O80429_MAIZE")) + { + assertEquals(1, features.size()); + sf = features.get(0); + assertNull(sf.getFeatureGroup()); + assertEquals(107, sf.getBegin()); + assertEquals(109, sf.getEnd()); + assertEquals("flxMotifN", sf.getDescription()); + } + else if (name.equals("FER1_PEA")) + { + assertEquals(1, features.size()); + sf = features.get(0); + assertEquals("uniprot", sf.getFeatureGroup()); + assertEquals(36, sf.getBegin()); + assertEquals(38, sf.getEnd()); + assertEquals("flxMotifG", sf.getDescription()); + } + else + { + assertTrue("MOTIF1 features found for " + name, features.isEmpty()); + } + } + + /* + * feature type MOTIF2 + * VTT motif should match FER1_PEA/26-28 + * VRT should match nothing + */ + for (SequenceI seq : al.getSequences()) + { + List features = seq.findFeatures(0, 9999, "MOTIF2"); + String name = seq.getName(); + if (name.equals("FER1_PEA")) + { + assertEquals(1, features.size()); + sf = features.get(0); + assertEquals("uniprot", sf.getFeatureGroup()); + assertEquals(26, sf.getBegin()); + assertEquals(28, sf.getEnd()); + assertEquals("vxtMotifT", sf.getDescription()); + assertEquals(-3.21f, sf.getScore()); + } + else + { + assertTrue("MOTIF2 features found for " + name, features.isEmpty()); + assertTrue(features.isEmpty()); + } + } + } }