import jalview.datamodel.SequenceDummy;
import jalview.datamodel.SequenceFeature;
import jalview.datamodel.SequenceI;
+import jalview.datamodel.features.FeatureMatcherSet;
+import jalview.datamodel.features.FeatureMatcherSetI;
import jalview.io.gff.GffHelperBase;
import jalview.io.gff.GffHelperFactory;
import jalview.io.gff.GffHelperI;
*/
public class FeaturesFile extends AlignFile implements FeaturesSourceI
{
+ private static final String TAB_REGEX = "\\t";
+
+ private static final String STARTGROUP = "STARTGROUP";
+
+ private static final String ENDGROUP = "ENDGROUP";
+
+ private static final String STARTFILTERS = "STARTFILTERS";
+
+ private static final String ENDFILTERS = "ENDFILTERS";
+
private static final String ID_NOT_SPECIFIED = "ID_NOT_SPECIFIED";
private static final String NOTE = "Note";
* @param align
* - alignment/dataset containing sequences that are to be annotated
* @param colours
- * - hashtable to store feature colour definitions
+ * - map to store feature colour definitions
* @param removeHTML
* - process html strings into plain text
* @param relaxedIdmatching
Map<String, FeatureColourI> colours, boolean removeHTML,
boolean relaxedIdmatching)
{
- Map<String, String> gffProps = new HashMap<String, String>();
+ return parse(align, colours, null, removeHTML, relaxedIdmatching);
+ }
+
+ /**
+ * Parse GFF or Jalview format sequence features file
+ *
+ * @param align
+ * - alignment/dataset containing sequences that are to be annotated
+ * @param colours
+ * - map to store feature colour definitions
+ * @param filters
+ * - map to store feature filter definitions
+ * @param removeHTML
+ * - process html strings into plain text
+ * @param relaxedIdmatching
+ * - when true, ID matches to compound sequence IDs are allowed
+ * @return true if features were added
+ */
+ public boolean parse(AlignmentI align,
+ Map<String, FeatureColourI> colours,
+ Map<String, FeatureMatcherSetI> filters, boolean removeHTML,
+ boolean relaxedIdmatching)
+ {
+ Map<String, String> gffProps = new HashMap<>();
/*
* keep track of any sequences we try to create from the data
*/
- List<SequenceI> newseqs = new ArrayList<SequenceI>();
+ List<SequenceI> newseqs = new ArrayList<>();
String line = null;
try
continue;
}
- gffColumns = line.split("\\t"); // tab as regex
+ gffColumns = line.split(TAB_REGEX);
if (gffColumns.length == 1)
{
if (line.trim().equalsIgnoreCase("GFF"))
}
}
- if (gffColumns.length > 1 && gffColumns.length < 4)
+ if (gffColumns.length > 0 && gffColumns.length < 4)
{
/*
* if 2 or 3 tokens, we anticipate either 'startgroup', 'endgroup' or
* a feature type colour specification
*/
String ft = gffColumns[0];
- if (ft.equalsIgnoreCase("startgroup"))
+ if (ft.equalsIgnoreCase(STARTFILTERS))
+ {
+ parseFilters(filters);
+ continue;
+ }
+ if (ft.equalsIgnoreCase(STARTGROUP))
{
featureGroup = gffColumns[1];
}
- else if (ft.equalsIgnoreCase("endgroup"))
+ else if (ft.equalsIgnoreCase(ENDGROUP))
{
// We should check whether this is the current group,
// but at present there's no way of showing more than 1 group
}
/**
+ * Reads input lines from STARTFILTERS to ENDFILTERS and adds a feature type
+ * filter to the map for each line parsed. After exit from this method,
+ * nextLine() should return the line after ENDFILTERS (or we are already at
+ * end of file if ENDFILTERS was missing).
+ *
+ * @param filters
+ * @throws IOException
+ */
+ protected void parseFilters(Map<String, FeatureMatcherSetI> filters)
+ throws IOException
+ {
+ String line;
+ while ((line = nextLine()) != null)
+ {
+ if (line.toUpperCase().startsWith(ENDFILTERS))
+ {
+ return;
+ }
+ String[] tokens = line.split(TAB_REGEX);
+ if (tokens.length != 2)
+ {
+ System.err.println(String.format("Invalid token count %d for %d",
+ tokens.length, line));
+ }
+ else
+ {
+ String featureType = tokens[0];
+ FeatureMatcherSetI fm = FeatureMatcherSet.fromString(tokens[1]);
+ if (fm != null && filters != null)
+ {
+ filters.put(featureType, fm);
+ }
+ }
+ }
+ }
+
+ /**
* Try to parse a Jalview format feature specification and add it as a
* sequence feature to any matching sequences in the alignment. Returns true
* if successful (a feature was added), or false if not.
}
/**
- * Returns contents of a Jalview format features file, for visible features,
- * as filtered by type and group. Features with a null group are displayed if
- * their feature type is visible. Non-positional features may optionally be
- * included (with no check on type or group).
+ * Returns contents of a Jalview format features file, for visible features, as
+ * filtered by type and group. Features with a null group are displayed if their
+ * feature type is visible. Non-positional features may optionally be included
+ * (with no check on type or group).
*
* @param sequences
* source of features
* @param visible
* map of colour for each visible feature type
+ * @param featureFilters
* @param visibleFeatureGroups
* @param includeNonPositional
* if true, include non-positional features (regardless of group or
*/
public String printJalviewFormat(SequenceI[] sequences,
Map<String, FeatureColourI> visible,
+ Map<String, FeatureMatcherSetI> featureFilters,
List<String> visibleFeatureGroups, boolean includeNonPositional)
{
if (!includeNonPositional && (visible == null || visible.isEmpty()))
.toArray(new String[visible.keySet().size()]);
/*
+ * feature filters if any
+ */
+ outputFeatureFilters(out, visible, featureFilters);
+
+ /*
* sort groups alphabetically, and ensure that features with a
* null or empty group are output after those in named groups
*/
- List<String> sortedGroups = new ArrayList<String>(visibleFeatureGroups);
+ List<String> sortedGroups = new ArrayList<>(visibleFeatureGroups);
sortedGroups.remove(null);
sortedGroups.remove("");
Collections.sort(sortedGroups);
}
}
- for (String group : sortedGroups)
+ /*
+ * positional features within groups
+ */
+ foundSome |= outputFeaturesByGroup(out, sortedGroups, types, sequences);
+
+ return foundSome ? out.toString() : "No Features Visible";
+ }
+
+ /**
+ * Outputs any feature filters defined for visible feature types, sandwiched by
+ * STARTFILTERS and ENDFILTERS lines
+ *
+ * @param out
+ * @param visible
+ * @param featureFilters
+ */
+ void outputFeatureFilters(StringBuilder out,
+ Map<String, FeatureColourI> visible,
+ Map<String, FeatureMatcherSetI> featureFilters)
+ {
+ if (visible == null || featureFilters == null
+ || featureFilters.isEmpty())
+ {
+ return;
+ }
+
+ boolean first = true;
+ for (String featureType : visible.keySet())
+ {
+ FeatureMatcherSetI filter = featureFilters.get(featureType);
+ if (filter != null)
+ {
+ if (first)
+ {
+ first = false;
+ out.append(newline).append(STARTFILTERS).append(newline);
+ }
+ out.append(featureType).append(TAB).append(filter.toStableString())
+ .append(newline);
+ }
+ }
+ if (!first)
+ {
+ out.append(ENDFILTERS).append(newline).append(newline);
+ }
+
+ }
+
+ /**
+ * Appends output of sequence features within feature groups to the output
+ * buffer. Groups other than the null or empty group are sandwiched by
+ * STARTGROUP and ENDGROUP lines.
+ *
+ * @param out
+ * @param groups
+ * @param featureTypes
+ * @param sequences
+ * @return
+ */
+ private boolean outputFeaturesByGroup(StringBuilder out,
+ List<String> groups, String[] featureTypes, SequenceI[] sequences)
+ {
+ boolean foundSome = false;
+ for (String group : groups)
{
boolean isNamedGroup = (group != null && !"".equals(group));
if (isNamedGroup)
{
out.append(newline);
- out.append("STARTGROUP").append(TAB);
+ out.append(STARTGROUP).append(TAB);
out.append(group);
out.append(newline);
}
for (int i = 0; i < sequences.length; i++)
{
String sequenceName = sequences[i].getName();
- List<SequenceFeature> features = new ArrayList<SequenceFeature>();
- if (types.length > 0)
+ List<SequenceFeature> features = new ArrayList<>();
+ if (featureTypes.length > 0)
{
features.addAll(sequences[i].getFeatures().getFeaturesForGroup(
- true, group, types));
+ true, group, featureTypes));
}
for (SequenceFeature sequenceFeature : features)
if (isNamedGroup)
{
- out.append("ENDGROUP").append(TAB);
+ out.append(ENDGROUP).append(TAB);
out.append(group);
out.append(newline);
}
}
-
- return foundSome ? out.toString() : "No Features Visible";
+ return foundSome;
}
/**
dataset = new Alignment(new SequenceI[] {});
}
- Map<String, FeatureColourI> featureColours = new HashMap<String, FeatureColourI>();
+ Map<String, FeatureColourI> featureColours = new HashMap<>();
boolean parseResult = parse(dataset, featureColours, false, true);
if (!parseResult)
{
for (SequenceI seq : sequences)
{
- List<SequenceFeature> features = new ArrayList<SequenceFeature>();
+ List<SequenceFeature> features = new ArrayList<>();
if (includeNonPositionalFeatures)
{
features.addAll(seq.getFeatures().getNonPositionalFeatures());
import static org.testng.AssertJUnit.assertEquals;
import static org.testng.AssertJUnit.assertFalse;
import static org.testng.AssertJUnit.assertNotNull;
+import static org.testng.AssertJUnit.assertSame;
import static org.testng.AssertJUnit.assertTrue;
+import static org.testng.internal.junit.ArrayAsserts.assertArrayEquals;
import jalview.api.FeatureColourI;
import jalview.api.FeatureRenderer;
import jalview.datamodel.SequenceDummy;
import jalview.datamodel.SequenceFeature;
import jalview.datamodel.SequenceI;
+import jalview.datamodel.features.FeatureMatcherI;
+import jalview.datamodel.features.FeatureMatcherSetI;
import jalview.datamodel.features.SequenceFeatures;
import jalview.gui.AlignFrame;
import jalview.gui.Desktop;
import jalview.gui.JvOptionPane;
import jalview.structure.StructureSelectionManager;
+import jalview.util.matcher.Condition;
import java.awt.Color;
import java.io.File;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
+import java.util.Iterator;
import java.util.List;
import java.util.Map;
*/
FeatureRenderer fr = af.alignPanel.getFeatureRenderer();
Map<String, FeatureColourI> visible = fr.getDisplayedFeatureCols();
- List<String> visibleGroups = new ArrayList<String>(
+ List<String> visibleGroups = new ArrayList<>(
Arrays.asList(new String[] {}));
String exported = featuresFile.printJalviewFormat(
- al.getSequencesArray(), visible, visibleGroups, false);
+ al.getSequencesArray(), visible, null, visibleGroups, false);
String expected = "No Features Visible";
assertEquals(expected, exported);
*/
visibleGroups.add("uniprot");
exported = featuresFile.printJalviewFormat(al.getSequencesArray(),
- visible, visibleGroups, true);
+ visible, null, visibleGroups, true);
expected = "Cath\tFER_CAPAA\t-1\t0\t0\tDomain\t0.0\n"
+ "desc1\tFER_CAPAN\t-1\t0\t0\tPfam\t1.3\n"
+ "desc3\tFER1_SOLLC\t-1\t0\t0\tPfam\n" // NaN is not output
fr.setVisible("GAMMA-TURN");
visible = fr.getDisplayedFeatureCols();
exported = featuresFile.printJalviewFormat(al.getSequencesArray(),
- visible, visibleGroups, false);
+ visible, null, visibleGroups, false);
expected = "METAL\tcc9900\n"
+ "GAMMA-TURN\tff0000|00ffff|20.0|95.0|below|66.0\n"
+ "\nSTARTGROUP\tuniprot\n"
fr.setVisible("Pfam");
visible = fr.getDisplayedFeatureCols();
exported = featuresFile.printJalviewFormat(al.getSequencesArray(),
- visible, visibleGroups, false);
+ visible, null, visibleGroups, false);
/*
* features are output within group, ordered by sequence and by type
*/
*/
FeaturesFile featuresFile = new FeaturesFile();
FeatureRenderer fr = af.alignPanel.getFeatureRenderer();
- Map<String, FeatureColourI> visible = new HashMap<String, FeatureColourI>();
- List<String> visibleGroups = new ArrayList<String>(
+ Map<String, FeatureColourI> visible = new HashMap<>();
+ List<String> visibleGroups = new ArrayList<>(
Arrays.asList(new String[] {}));
String exported = featuresFile.printGffFormat(al.getSequencesArray(),
visible, visibleGroups, false);
+ "FER_CAPAN\tUniprot\tPfam\t20\t20\t0.0\t+\t2\tx=y;black=white\n";
assertEquals(expected, exported);
}
+
+ /**
+ * Test for parsing of feature filters as represented in a Jalview features
+ * file
+ *
+ * @throws Exception
+ */
+ @Test(groups = { "Functional" })
+ public void testParseFilters() throws Exception
+ {
+ Map<String, FeatureMatcherSetI> filters = new HashMap<>();
+ String text = "sequence_variant\tCSQ:PolyPhen NotContains 'damaging'\n"
+ + "missense_variant\t(label contains foobar) and (Score lt 1.3)";
+ FeaturesFile featuresFile = new FeaturesFile(text,
+ DataSourceType.PASTE);
+ featuresFile.parseFilters(filters);
+ assertEquals(filters.size(), 2);
+
+ FeatureMatcherSetI fm = filters.get("sequence_variant");
+ assertNotNull(fm);
+ Iterator<FeatureMatcherI> matchers = fm.getMatchers().iterator();
+ FeatureMatcherI matcher = matchers.next();
+ assertFalse(matchers.hasNext());
+ String[] attributes = matcher.getAttribute();
+ assertArrayEquals(attributes, new String[] { "CSQ", "PolyPhen" });
+ assertSame(matcher.getMatcher().getCondition(), Condition.NotContains);
+ assertEquals(matcher.getMatcher().getPattern(), "damaging");
+
+ fm = filters.get("missense_variant");
+ assertNotNull(fm);
+ matchers = fm.getMatchers().iterator();
+ matcher = matchers.next();
+ assertTrue(matcher.isByLabel());
+ assertSame(matcher.getMatcher().getCondition(), Condition.Contains);
+ assertEquals(matcher.getMatcher().getPattern(), "foobar");
+ matcher = matchers.next();
+ assertTrue(matcher.isByScore());
+ assertSame(matcher.getMatcher().getCondition(), Condition.LT);
+ assertEquals(matcher.getMatcher().getPattern(), "1.3");
+ assertEquals(matcher.getMatcher().getFloatValue(), 1.3f);
+
+ assertFalse(matchers.hasNext());
+ }
}