import jalview.analysis.SequenceIdMatcher;
import jalview.api.AlignViewportI;
import jalview.api.FeatureColourI;
+import jalview.api.FeatureRenderer;
import jalview.api.FeaturesSourceI;
import jalview.datamodel.AlignedCodonFrame;
import jalview.datamodel.Alignment;
import jalview.datamodel.SequenceDummy;
import jalview.datamodel.SequenceFeature;
import jalview.datamodel.SequenceI;
+import jalview.datamodel.features.FeatureMatcherSet;
+import jalview.datamodel.features.FeatureMatcherSetI;
import jalview.io.gff.GffHelperBase;
import jalview.io.gff.GffHelperFactory;
import jalview.io.gff.GffHelperI;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
-import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
*/
public class FeaturesFile extends AlignFile implements FeaturesSourceI
{
+ private static final String TAB_REGEX = "\\t";
+
+ private static final String STARTGROUP = "STARTGROUP";
+
+ private static final String ENDGROUP = "ENDGROUP";
+
+ private static final String STARTFILTERS = "STARTFILTERS";
+
+ private static final String ENDFILTERS = "ENDFILTERS";
+
private static final String ID_NOT_SPECIFIED = "ID_NOT_SPECIFIED";
private static final String NOTE = "Note";
- protected static final String TAB = "\t";
-
protected static final String GFF_VERSION = "##gff-version";
- private static final Comparator<String> SORT_NULL_LAST = new Comparator<String>()
- {
- @Override
- public int compare(String o1, String o2)
- {
- if (o1 == null)
- {
- return o2 == null ? 0 : 1;
- }
- return (o2 == null ? -1 : o1.compareTo(o2));
- }
- };
-
private AlignmentI lastmatchedAl = null;
private SequenceIdMatcher matcher = null;
/**
* Constructor which does not parse the file immediately
*
- * @param inFile
+ * @param file File or String filename
* @param paste
* @throws IOException
*/
- public FeaturesFile(String inFile, DataSourceType paste)
+ public FeaturesFile(Object file, DataSourceType paste)
throws IOException
{
- super(false, inFile, paste);
+ super(false, file, paste);
}
/**
* Constructor that optionally parses the file immediately
*
* @param parseImmediately
- * @param inFile
+ * @param file
* @param type
* @throws IOException
*/
- public FeaturesFile(boolean parseImmediately, String inFile,
- DataSourceType type)
- throws IOException
+ public FeaturesFile(boolean parseImmediately, Object file,
+ DataSourceType type) throws IOException
{
- super(parseImmediately, inFile, type);
+ super(parseImmediately, file, type);
}
/**
* @param align
* - alignment/dataset containing sequences that are to be annotated
* @param colours
- * - hashtable to store feature colour definitions
+ * - map to store feature colour definitions
* @param removeHTML
* - process html strings into plain text
* @param relaxedIdmatching
Map<String, FeatureColourI> colours, boolean removeHTML,
boolean relaxedIdmatching)
{
- Map<String, String> gffProps = new HashMap<String, String>();
+ return parse(align, colours, null, removeHTML, relaxedIdmatching);
+ }
+
+ /**
+ * Parse GFF or Jalview format sequence features file
+ *
+ * @param align
+ * - alignment/dataset containing sequences that are to be annotated
+ * @param colours
+ * - map to store feature colour definitions
+ * @param filters
+ * - map to store feature filter definitions
+ * @param removeHTML
+ * - process html strings into plain text
+ * @param relaxedIdmatching
+ * - when true, ID matches to compound sequence IDs are allowed
+ * @return true if features were added
+ */
+ public boolean parse(AlignmentI align,
+ Map<String, FeatureColourI> colours,
+ Map<String, FeatureMatcherSetI> filters, boolean removeHTML,
+ boolean relaxedIdmatching)
+ {
+ Map<String, String> gffProps = new HashMap<>();
/*
* keep track of any sequences we try to create from the data
*/
- List<SequenceI> newseqs = new ArrayList<SequenceI>();
+ List<SequenceI> newseqs = new ArrayList<>();
String line = null;
try
continue;
}
- gffColumns = line.split("\\t"); // tab as regex
+ gffColumns = line.split(TAB_REGEX);
if (gffColumns.length == 1)
{
if (line.trim().equalsIgnoreCase("GFF"))
}
}
- if (gffColumns.length > 1 && gffColumns.length < 4)
+ if (gffColumns.length > 0 && gffColumns.length < 4)
{
/*
* if 2 or 3 tokens, we anticipate either 'startgroup', 'endgroup' or
* a feature type colour specification
*/
String ft = gffColumns[0];
- if (ft.equalsIgnoreCase("startgroup"))
+ if (ft.equalsIgnoreCase(STARTFILTERS))
+ {
+ parseFilters(filters);
+ continue;
+ }
+ if (ft.equalsIgnoreCase(STARTGROUP))
{
featureGroup = gffColumns[1];
}
- else if (ft.equalsIgnoreCase("endgroup"))
+ else if (ft.equalsIgnoreCase(ENDGROUP))
{
// We should check whether this is the current group,
// but at present there's no way of showing more than 1 group
}
/**
+ * Reads input lines from STARTFILTERS to ENDFILTERS and adds a feature type
+ * filter to the map for each line parsed. After exit from this method,
+ * nextLine() should return the line after ENDFILTERS (or we are already at
+ * end of file if ENDFILTERS was missing).
+ *
+ * @param filters
+ * @throws IOException
+ */
+ protected void parseFilters(Map<String, FeatureMatcherSetI> filters)
+ throws IOException
+ {
+ String line;
+ while ((line = nextLine()) != null)
+ {
+ if (line.toUpperCase().startsWith(ENDFILTERS))
+ {
+ return;
+ }
+ String[] tokens = line.split(TAB_REGEX);
+ if (tokens.length != 2)
+ {
+ System.err.println(String.format("Invalid token count %d for %d",
+ tokens.length, line));
+ }
+ else
+ {
+ String featureType = tokens[0];
+ FeatureMatcherSetI fm = FeatureMatcherSet.fromString(tokens[1]);
+ if (fm != null && filters != null)
+ {
+ filters.put(featureType, fm);
+ }
+ }
+ }
+ }
+
+ /**
* Try to parse a Jalview format feature specification and add it as a
* sequence feature to any matching sequences in the alignment. Returns true
* if successful (a feature was added), or false if not.
*/
protected boolean parseJalviewFeature(String line, String[] gffColumns,
AlignmentI alignment, Map<String, FeatureColourI> featureColours,
- boolean removeHTML, boolean relaxedIdMatching, String featureGroup)
+ boolean removeHTML, boolean relaxedIdMatching,
+ String featureGroup)
{
/*
* tokens: description seqid seqIndex start end type [score]
* included (with no check on type or group).
*
* @param sequences
- * source of features
- * @param visible
- * map of colour for each visible feature type
- * @param visibleFeatureGroups
+ * @param fr
* @param includeNonPositional
* if true, include non-positional features (regardless of group or
* type)
* @return
*/
public String printJalviewFormat(SequenceI[] sequences,
- Map<String, FeatureColourI> visible,
- List<String> visibleFeatureGroups, boolean includeNonPositional)
+ FeatureRenderer fr, boolean includeNonPositional)
{
- if (!includeNonPositional && (visible == null || visible.isEmpty()))
+ Map<String, FeatureColourI> visibleColours = fr
+ .getDisplayedFeatureCols();
+ Map<String, FeatureMatcherSetI> featureFilters = fr.getFeatureFilters();
+
+ if (!includeNonPositional
+ && (visibleColours == null || visibleColours.isEmpty()))
{
// no point continuing.
return "No Features Visible";
*/
// TODO: decide if feature links should also be written here ?
StringBuilder out = new StringBuilder(256);
- if (visible != null)
+ if (visibleColours != null)
{
- for (Entry<String, FeatureColourI> featureColour : visible.entrySet())
+ for (Entry<String, FeatureColourI> featureColour : visibleColours
+ .entrySet())
{
FeatureColourI colour = featureColour.getValue();
out.append(colour.toJalviewFormat(featureColour.getKey())).append(
}
}
- String[] types = visible == null ? new String[0] : visible.keySet()
- .toArray(new String[visible.keySet().size()]);
+ String[] types = visibleColours == null ? new String[0]
+ : visibleColours.keySet()
+ .toArray(new String[visibleColours.keySet().size()]);
+
+ /*
+ * feature filters if any
+ */
+ outputFeatureFilters(out, visibleColours, featureFilters);
+
+ /*
+ * output features within groups
+ */
+ int count = outputFeaturesByGroup(out, fr, types, sequences,
+ includeNonPositional);
+
+ return count > 0 ? out.toString() : "No Features Visible";
+ }
+
+ /**
+ * Outputs any feature filters defined for visible feature types, sandwiched by
+ * STARTFILTERS and ENDFILTERS lines
+ *
+ * @param out
+ * @param visible
+ * @param featureFilters
+ */
+ void outputFeatureFilters(StringBuilder out,
+ Map<String, FeatureColourI> visible,
+ Map<String, FeatureMatcherSetI> featureFilters)
+ {
+ if (visible == null || featureFilters == null
+ || featureFilters.isEmpty())
+ {
+ return;
+ }
+
+ boolean first = true;
+ for (String featureType : visible.keySet())
+ {
+ FeatureMatcherSetI filter = featureFilters.get(featureType);
+ if (filter != null)
+ {
+ if (first)
+ {
+ first = false;
+ out.append(newline).append(STARTFILTERS).append(newline);
+ }
+ out.append(featureType).append(TAB).append(filter.toStableString())
+ .append(newline);
+ }
+ }
+ if (!first)
+ {
+ out.append(ENDFILTERS).append(newline);
+ }
+
+ }
+
+ /**
+ * Appends output of visible sequence features within feature groups to the
+ * output buffer. Groups other than the null or empty group are sandwiched by
+ * STARTGROUP and ENDGROUP lines. Answers the number of features written.
+ *
+ * @param out
+ * @param fr
+ * @param featureTypes
+ * @param sequences
+ * @param includeNonPositional
+ * @return
+ */
+ private int outputFeaturesByGroup(StringBuilder out,
+ FeatureRenderer fr, String[] featureTypes,
+ SequenceI[] sequences, boolean includeNonPositional)
+ {
+ List<String> featureGroups = fr.getFeatureGroups();
/*
* sort groups alphabetically, and ensure that features with a
* null or empty group are output after those in named groups
*/
- List<String> sortedGroups = new ArrayList<String>(visibleFeatureGroups);
+ List<String> sortedGroups = new ArrayList<>(featureGroups);
sortedGroups.remove(null);
sortedGroups.remove("");
Collections.sort(sortedGroups);
sortedGroups.add(null);
sortedGroups.add("");
- boolean foundSome = false;
+ int count = 0;
+ List<String> visibleGroups = fr.getDisplayedFeatureGroups();
/*
- * first output any non-positional features
+ * loop over all groups (may be visible or not);
+ * non-positional features are output even if group is not visible
*/
- if (includeNonPositional)
+ for (String group : sortedGroups)
{
+ boolean firstInGroup = true;
+ boolean isNullGroup = group == null || "".equals(group);
+
for (int i = 0; i < sequences.length; i++)
{
String sequenceName = sequences[i].getName();
- for (SequenceFeature feature : sequences[i].getFeatures()
- .getNonPositionalFeatures())
+ List<SequenceFeature> features = new ArrayList<>();
+
+ /*
+ * get any non-positional features in this group, if wanted
+ * (for any feature type, whether visible or not)
+ */
+ if (includeNonPositional)
{
- foundSome = true;
- out.append(formatJalviewFeature(sequenceName, feature));
+ features.addAll(sequences[i].getFeatures()
+ .getFeaturesForGroup(false, group));
}
- }
- }
-
- for (String group : sortedGroups)
- {
- boolean isNamedGroup = (group != null && !"".equals(group));
- if (isNamedGroup)
- {
- out.append(newline);
- out.append("STARTGROUP").append(TAB);
- out.append(group);
- out.append(newline);
- }
- /*
- * output positional features within groups
- */
- for (int i = 0; i < sequences.length; i++)
- {
- String sequenceName = sequences[i].getName();
- List<SequenceFeature> features = new ArrayList<SequenceFeature>();
- if (types.length > 0)
+ /*
+ * add positional features for visible feature types, but
+ * (for named groups) only if feature group is visible
+ */
+ if (featureTypes.length > 0
+ && (isNullGroup || visibleGroups.contains(group)))
{
features.addAll(sequences[i].getFeatures().getFeaturesForGroup(
- true, group, types));
+ true, group, featureTypes));
}
- for (SequenceFeature sequenceFeature : features)
+ for (SequenceFeature sf : features)
{
- foundSome = true;
- out.append(formatJalviewFeature(sequenceName, sequenceFeature));
+ if (sf.isNonPositional() || fr.isVisible(sf))
+ {
+ count++;
+ if (firstInGroup)
+ {
+ out.append(newline);
+ if (!isNullGroup)
+ {
+ out.append(STARTGROUP).append(TAB).append(group)
+ .append(newline);
+ }
+ }
+ firstInGroup = false;
+ out.append(formatJalviewFeature(sequenceName, sf));
+ }
}
}
- if (isNamedGroup)
+ if (!isNullGroup && !firstInGroup)
{
- out.append("ENDGROUP").append(TAB);
- out.append(group);
- out.append(newline);
+ out.append(ENDGROUP).append(TAB).append(group).append(newline);
}
}
-
- return foundSome ? out.toString() : "No Features Visible";
+ return count;
}
/**
dataset = new Alignment(new SequenceI[] {});
}
- Map<String, FeatureColourI> featureColours = new HashMap<String, FeatureColourI>();
+ Map<String, FeatureColourI> featureColours = new HashMap<>();
boolean parseResult = parse(dataset, featureColours, false, true);
if (!parseResult)
{
* @return
*/
public String printGffFormat(SequenceI[] sequences,
- Map<String, FeatureColourI> visible,
- List<String> visibleFeatureGroups,
- boolean includeNonPositionalFeatures)
+ FeatureRenderer fr, boolean includeNonPositionalFeatures)
{
+ Map<String, FeatureColourI> visibleColours = fr.getDisplayedFeatureCols();
+
StringBuilder out = new StringBuilder(256);
out.append(String.format("%s %d\n", GFF_VERSION, gffVersion == 0 ? 2 : gffVersion));
if (!includeNonPositionalFeatures
- && (visible == null || visible.isEmpty()))
+ && (visibleColours == null || visibleColours.isEmpty()))
{
return out.toString();
}
- String[] types = visible == null ? new String[0] : visible.keySet()
- .toArray(
- new String[visible.keySet().size()]);
+ String[] types = visibleColours == null ? new String[0]
+ : visibleColours.keySet()
+ .toArray(new String[visibleColours.keySet().size()]);
for (SequenceI seq : sequences)
{
- List<SequenceFeature> features = new ArrayList<SequenceFeature>();
+ List<SequenceFeature> features = new ArrayList<>();
if (includeNonPositionalFeatures)
{
features.addAll(seq.getFeatures().getNonPositionalFeatures());
}
- if (visible != null && !visible.isEmpty())
+ if (visibleColours != null && !visibleColours.isEmpty())
{
features.addAll(seq.getFeatures().getPositionalFeatures(types));
}
for (SequenceFeature sf : features)
{
- String source = sf.featureGroup;
- if (!sf.isNonPositional() && source != null
- && !visibleFeatureGroups.contains(source))
+ if (!sf.isNonPositional() && !fr.isVisible(sf))
{
- // group is not visible
+ /*
+ * feature hidden by group visibility, colour threshold,
+ * or feature filter condition
+ */
continue;
}
+ String source = sf.featureGroup;
if (source == null)
{
source = sf.getDescription();
fromCount = Integer.parseInt(tokens[2]);
} catch (NumberFormatException nfe)
{
- throw new IOException("Invalid number in Align field: "
- + nfe.getMessage());
+ throw new IOException(
+ "Invalid number in Align field: " + nfe.getMessage());
}
/*
* @param newseqs
* @throws IOException
*/
- protected void processGffPragma(String line,
- Map<String, String> gffProps, AlignmentI align,
- List<SequenceI> newseqs) throws IOException
+ protected void processGffPragma(String line, Map<String, String> gffProps,
+ AlignmentI align, List<SequenceI> newseqs) throws IOException
{
line = line.trim();
if ("###".equals(line))