import jalview.datamodel.SequenceDummy;
import jalview.datamodel.SequenceFeature;
import jalview.datamodel.SequenceI;
+import jalview.datamodel.features.FeatureMatcherSet;
+import jalview.datamodel.features.FeatureMatcherSetI;
import jalview.io.gff.GffHelperBase;
import jalview.io.gff.GffHelperFactory;
import jalview.io.gff.GffHelperI;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
-import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
*/
public class FeaturesFile extends AlignFile implements FeaturesSourceI
{
+ private static final String TAB_REGEX = "\\t";
+
+ private static final String STARTGROUP = "STARTGROUP";
+
+ private static final String ENDGROUP = "ENDGROUP";
+
+ private static final String STARTFILTERS = "STARTFILTERS";
+
+ private static final String ENDFILTERS = "ENDFILTERS";
+
private static final String ID_NOT_SPECIFIED = "ID_NOT_SPECIFIED";
private static final String NOTE = "Note";
- protected static final String TAB = "\t";
-
protected static final String GFF_VERSION = "##gff-version";
- private static final Comparator<String> SORT_NULL_LAST = new Comparator<String>()
- {
- @Override
- public int compare(String o1, String o2)
- {
- if (o1 == null)
- {
- return o2 == null ? 0 : 1;
- }
- return (o2 == null ? -1 : o1.compareTo(o2));
- }
- };
-
private AlignmentI lastmatchedAl = null;
private SequenceIdMatcher matcher = null;
/**
* Constructor which does not parse the file immediately
*
- * @param inFile
+ * @param file
* @param paste
* @throws IOException
*/
- public FeaturesFile(String inFile, DataSourceType paste)
+ public FeaturesFile(String file, DataSourceType paste)
throws IOException
{
- super(false, inFile, paste);
+ super(false, file, paste);
}
/**
* Constructor that optionally parses the file immediately
*
* @param parseImmediately
- * @param inFile
+ * @param file
* @param type
* @throws IOException
*/
- public FeaturesFile(boolean parseImmediately, String inFile,
- DataSourceType type)
- throws IOException
+ public FeaturesFile(boolean parseImmediately, String file,
+ DataSourceType type) throws IOException
{
- super(parseImmediately, inFile, type);
+ super(parseImmediately, file, type);
}
/**
* @param align
* - alignment/dataset containing sequences that are to be annotated
* @param colours
- * - hashtable to store feature colour definitions
+ * - map to store feature colour definitions
* @param removeHTML
* - process html strings into plain text
* @param relaxedIdmatching
Map<String, FeatureColourI> colours, boolean removeHTML,
boolean relaxedIdmatching)
{
- Map<String, String> gffProps = new HashMap<String, String>();
+ return parse(align, colours, null, removeHTML, relaxedIdmatching);
+ }
+
+ /**
+ * Parse GFF or Jalview format sequence features file
+ *
+ * @param align
+ * - alignment/dataset containing sequences that are to be annotated
+ * @param colours
+ * - map to store feature colour definitions
+ * @param filters
+ * - map to store feature filter definitions
+ * @param removeHTML
+ * - process html strings into plain text
+ * @param relaxedIdmatching
+ * - when true, ID matches to compound sequence IDs are allowed
+ * @return true if features were added
+ */
+ public boolean parse(AlignmentI align,
+ Map<String, FeatureColourI> colours,
+ Map<String, FeatureMatcherSetI> filters, boolean removeHTML,
+ boolean relaxedIdmatching)
+ {
+ Map<String, String> gffProps = new HashMap<>();
/*
* keep track of any sequences we try to create from the data
*/
- List<SequenceI> newseqs = new ArrayList<SequenceI>();
+ List<SequenceI> newseqs = new ArrayList<>();
String line = null;
try
continue;
}
- gffColumns = line.split("\\t"); // tab as regex
+ gffColumns = line.split(TAB_REGEX);
if (gffColumns.length == 1)
{
if (line.trim().equalsIgnoreCase("GFF"))
}
}
- if (gffColumns.length > 1 && gffColumns.length < 4)
+ if (gffColumns.length > 0 && gffColumns.length < 4)
{
/*
* if 2 or 3 tokens, we anticipate either 'startgroup', 'endgroup' or
* a feature type colour specification
*/
String ft = gffColumns[0];
- if (ft.equalsIgnoreCase("startgroup"))
+ if (ft.equalsIgnoreCase(STARTFILTERS))
+ {
+ parseFilters(filters);
+ continue;
+ }
+ if (ft.equalsIgnoreCase(STARTGROUP))
{
featureGroup = gffColumns[1];
}
- else if (ft.equalsIgnoreCase("endgroup"))
+ else if (ft.equalsIgnoreCase(ENDGROUP))
{
// We should check whether this is the current group,
// but at present there's no way of showing more than 1 group
}
/**
+ * Reads input lines from STARTFILTERS to ENDFILTERS and adds a feature type
+ * filter to the map for each line parsed. After exit from this method,
+ * nextLine() should return the line after ENDFILTERS (or we are already at
+ * end of file if ENDFILTERS was missing).
+ *
+ * @param filters
+ * @throws IOException
+ */
+ protected void parseFilters(Map<String, FeatureMatcherSetI> filters)
+ throws IOException
+ {
+ String line;
+ while ((line = nextLine()) != null)
+ {
+ if (line.toUpperCase().startsWith(ENDFILTERS))
+ {
+ return;
+ }
+ String[] tokens = line.split(TAB_REGEX);
+ if (tokens.length != 2)
+ {
+ System.err.println(String.format("Invalid token count %d for %d",
+ tokens.length, line));
+ }
+ else
+ {
+ String featureType = tokens[0];
+ FeatureMatcherSetI fm = FeatureMatcherSet.fromString(tokens[1]);
+ if (fm != null && filters != null)
+ {
+ filters.put(featureType, fm);
+ }
+ }
+ }
+ }
+
+ /**
* Try to parse a Jalview format feature specification and add it as a
* sequence feature to any matching sequences in the alignment. Returns true
* if successful (a feature was added), or false if not.
*/
protected boolean parseJalviewFeature(String line, String[] gffColumns,
AlignmentI alignment, Map<String, FeatureColourI> featureColours,
- boolean removeHTML, boolean relaxedIdMatching, String featureGroup)
+ boolean removeHTML, boolean relaxedIdMatching,
+ String featureGroup)
{
/*
* tokens: description seqid seqIndex start end type [score]
Color colour = ColorUtils.createColourFromName(ft);
featureColours.put(ft, new FeatureColour(colour));
}
- SequenceFeature sf = new SequenceFeature(ft, desc, "", startPos,
- endPos, featureGroup);
+ SequenceFeature sf = null;
if (gffColumns.length > 6)
{
float score = Float.NaN;
try
{
score = new Float(gffColumns[6]).floatValue();
- // update colourgradient bounds if allowed to
} catch (NumberFormatException ex)
{
- // leave as NaN
+ sf = new SequenceFeature(ft, desc, startPos, endPos, featureGroup);
}
- sf.setScore(score);
+ sf = new SequenceFeature(ft, desc, startPos, endPos, score,
+ featureGroup);
+ }
+ else
+ {
+ sf = new SequenceFeature(ft, desc, startPos, endPos, featureGroup);
}
parseDescriptionHTML(sf, removeHTML);
}
/**
- * Returns contents of a Jalview format features file, for visible features,
- * as filtered by type and group. Features with a null group are displayed if
- * their feature type is visible. Non-positional features may optionally be
- * included (with no check on type or group).
+ * Returns contents of a Jalview format features file, for visible features, as
+ * filtered by type and group. Features with a null group are displayed if their
+ * feature type is visible. Non-positional features may optionally be included
+ * (with no check on type or group).
*
* @param sequences
* source of features
* @param visible
* map of colour for each visible feature type
+ * @param featureFilters
* @param visibleFeatureGroups
* @param includeNonPositional
* if true, include non-positional features (regardless of group or
*/
public String printJalviewFormat(SequenceI[] sequences,
Map<String, FeatureColourI> visible,
+ Map<String, FeatureMatcherSetI> featureFilters,
List<String> visibleFeatureGroups, boolean includeNonPositional)
{
if (!includeNonPositional && (visible == null || visible.isEmpty()))
.toArray(new String[visible.keySet().size()]);
/*
+ * feature filters if any
+ */
+ outputFeatureFilters(out, visible, featureFilters);
+
+ /*
* sort groups alphabetically, and ensure that features with a
* null or empty group are output after those in named groups
*/
- List<String> sortedGroups = new ArrayList<String>(visibleFeatureGroups);
+ List<String> sortedGroups = new ArrayList<>(visibleFeatureGroups);
sortedGroups.remove(null);
sortedGroups.remove("");
Collections.sort(sortedGroups);
}
}
- for (String group : sortedGroups)
+ /*
+ * positional features within groups
+ */
+ foundSome |= outputFeaturesByGroup(out, sortedGroups, types, sequences);
+
+ return foundSome ? out.toString() : "No Features Visible";
+ }
+
+ /**
+ * Outputs any feature filters defined for visible feature types, sandwiched by
+ * STARTFILTERS and ENDFILTERS lines
+ *
+ * @param out
+ * @param visible
+ * @param featureFilters
+ */
+ void outputFeatureFilters(StringBuilder out,
+ Map<String, FeatureColourI> visible,
+ Map<String, FeatureMatcherSetI> featureFilters)
+ {
+ if (visible == null || featureFilters == null
+ || featureFilters.isEmpty())
+ {
+ return;
+ }
+
+ boolean first = true;
+ for (String featureType : visible.keySet())
+ {
+ FeatureMatcherSetI filter = featureFilters.get(featureType);
+ if (filter != null)
+ {
+ if (first)
+ {
+ first = false;
+ out.append(newline).append(STARTFILTERS).append(newline);
+ }
+ out.append(featureType).append(TAB).append(filter.toStableString())
+ .append(newline);
+ }
+ }
+ if (!first)
+ {
+ out.append(ENDFILTERS).append(newline).append(newline);
+ }
+
+ }
+
+ /**
+ * Appends output of sequence features within feature groups to the output
+ * buffer. Groups other than the null or empty group are sandwiched by
+ * STARTGROUP and ENDGROUP lines.
+ *
+ * @param out
+ * @param groups
+ * @param featureTypes
+ * @param sequences
+ * @return
+ */
+ private boolean outputFeaturesByGroup(StringBuilder out,
+ List<String> groups, String[] featureTypes, SequenceI[] sequences)
+ {
+ boolean foundSome = false;
+ for (String group : groups)
{
boolean isNamedGroup = (group != null && !"".equals(group));
if (isNamedGroup)
{
out.append(newline);
- out.append("STARTGROUP").append(TAB);
+ out.append(STARTGROUP).append(TAB);
out.append(group);
out.append(newline);
}
for (int i = 0; i < sequences.length; i++)
{
String sequenceName = sequences[i].getName();
- List<SequenceFeature> features = new ArrayList<SequenceFeature>();
- if (types.length > 0)
+ List<SequenceFeature> features = new ArrayList<>();
+ if (featureTypes.length > 0)
{
features.addAll(sequences[i].getFeatures().getFeaturesForGroup(
- true, group, types));
+ true, group, featureTypes));
}
for (SequenceFeature sequenceFeature : features)
if (isNamedGroup)
{
- out.append("ENDGROUP").append(TAB);
+ out.append(ENDGROUP).append(TAB);
out.append(group);
out.append(newline);
}
}
-
- return foundSome ? out.toString() : "No Features Visible";
+ return foundSome;
}
/**
dataset = new Alignment(new SequenceI[] {});
}
- Map<String, FeatureColourI> featureColours = new HashMap<String, FeatureColourI>();
+ Map<String, FeatureColourI> featureColours = new HashMap<>();
boolean parseResult = parse(dataset, featureColours, false, true);
if (!parseResult)
{
for (SequenceI seq : sequences)
{
- List<SequenceFeature> features = new ArrayList<SequenceFeature>();
+ List<SequenceFeature> features = new ArrayList<>();
if (includeNonPositionalFeatures)
{
features.addAll(seq.getFeatures().getNonPositionalFeatures());
fromCount = Integer.parseInt(tokens[2]);
} catch (NumberFormatException nfe)
{
- throw new IOException("Invalid number in Align field: "
- + nfe.getMessage());
+ throw new IOException(
+ "Invalid number in Align field: " + nfe.getMessage());
}
/*
* @param newseqs
* @throws IOException
*/
- protected void processGffPragma(String line,
- Map<String, String> gffProps, AlignmentI align,
- List<SequenceI> newseqs) throws IOException
+ protected void processGffPragma(String line, Map<String, String> gffProps,
+ AlignmentI align, List<SequenceI> newseqs) throws IOException
{
line = line.trim();
if ("###".equals(line))