*/
package jalview.io;
+import java.awt.Color;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.TreeMap;
+
import jalview.analysis.AlignmentUtils;
import jalview.analysis.SequenceIdMatcher;
import jalview.api.AlignViewportI;
import jalview.api.FeatureColourI;
+import jalview.api.FeatureRenderer;
import jalview.api.FeaturesSourceI;
import jalview.datamodel.AlignedCodonFrame;
import jalview.datamodel.Alignment;
import jalview.datamodel.AlignmentI;
+import jalview.datamodel.MappedFeatures;
import jalview.datamodel.SequenceDummy;
import jalview.datamodel.SequenceFeature;
import jalview.datamodel.SequenceI;
import jalview.datamodel.features.FeatureMatcherSet;
import jalview.datamodel.features.FeatureMatcherSetI;
-import jalview.io.gff.GffHelperBase;
+import jalview.gui.Desktop;
import jalview.io.gff.GffHelperFactory;
import jalview.io.gff.GffHelperI;
import jalview.schemes.FeatureColour;
import jalview.util.ParseHtmlBodyAndLinks;
import jalview.util.StringUtils;
-import java.awt.Color;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Map.Entry;
-
/**
* Parses and writes features files, which may be in Jalview, GFF2 or GFF3
* format. These are tab-delimited formats but with differences in the use of
*/
public class FeaturesFile extends AlignFile implements FeaturesSourceI
{
+ private static final String EQUALS = "=";
+
private static final String TAB_REGEX = "\\t";
private static final String STARTGROUP = "STARTGROUP";
private static final String ID_NOT_SPECIFIED = "ID_NOT_SPECIFIED";
- private static final String NOTE = "Note";
-
protected static final String GFF_VERSION = "##gff-version";
private AlignmentI lastmatchedAl = null;
float score = Float.NaN;
try
{
- score = new Float(gffColumns[6]).floatValue();
+ score = Float.valueOf(gffColumns[6]).floatValue();
} catch (NumberFormatException ex)
{
sf = new SequenceFeature(ft, desc, startPos, endPos, featureGroup);
* (with no check on type or group).
*
* @param sequences
- * source of features
- * @param visible
- * map of colour for each visible feature type
- * @param featureFilters
- * @param visibleFeatureGroups
+ * @param fr
* @param includeNonPositional
- * if true, include non-positional features (regardless of group or
- * type)
+ * if true, include non-positional features
+ * (regardless of group or type)
+ * @param includeComplement
+ * if true, include visible complementary
+ * (CDS/protein) positional features, with
+ * locations converted to local sequence
+ * coordinates
* @return
*/
public String printJalviewFormat(SequenceI[] sequences,
- Map<String, FeatureColourI> visible,
- Map<String, FeatureMatcherSetI> featureFilters,
- List<String> visibleFeatureGroups, boolean includeNonPositional)
+ FeatureRenderer fr, boolean includeNonPositional,
+ boolean includeComplement)
{
- if (!includeNonPositional && (visible == null || visible.isEmpty()))
- {
- // no point continuing.
- return "No Features Visible";
- }
+ Map<String, FeatureColourI> visibleColours = fr
+ .getDisplayedFeatureCols();
+ Map<String, FeatureMatcherSetI> featureFilters = fr.getFeatureFilters();
/*
* write out feature colours (if we know them)
*/
// TODO: decide if feature links should also be written here ?
StringBuilder out = new StringBuilder(256);
- if (visible != null)
+ if (visibleColours != null)
{
- for (Entry<String, FeatureColourI> featureColour : visible.entrySet())
+ for (Entry<String, FeatureColourI> featureColour : visibleColours
+ .entrySet())
{
FeatureColourI colour = featureColour.getValue();
out.append(colour.toJalviewFormat(featureColour.getKey())).append(
}
}
- String[] types = visible == null ? new String[0] : visible.keySet()
- .toArray(new String[visible.keySet().size()]);
+ String[] types = visibleColours == null ? new String[0]
+ : visibleColours.keySet()
+ .toArray(new String[visibleColours.keySet().size()]);
/*
* feature filters if any
*/
- outputFeatureFilters(out, visible, featureFilters);
+ outputFeatureFilters(out, visibleColours, featureFilters);
/*
- * sort groups alphabetically, and ensure that features with a
- * null or empty group are output after those in named groups
+ * output features within groups
*/
- List<String> sortedGroups = new ArrayList<>(visibleFeatureGroups);
- sortedGroups.remove(null);
- sortedGroups.remove("");
- Collections.sort(sortedGroups);
- sortedGroups.add(null);
- sortedGroups.add("");
+ int count = outputFeaturesByGroup(out, fr, types, sequences,
+ includeNonPositional);
+
+ if (includeComplement)
+ {
+ count += outputComplementFeatures(out, fr, sequences);
+ }
+
+ return count > 0 ? out.toString() : "No Features Visible";
+ }
- boolean foundSome = false;
+ /**
+ * Outputs any visible complementary (CDS/peptide) positional features as
+ * Jalview format, within feature group. The coordinates of the linked features
+ * are converted to the corresponding positions of the local sequences.
+ *
+ * @param out
+ * @param fr
+ * @param sequences
+ * @return
+ */
+ private int outputComplementFeatures(StringBuilder out,
+ FeatureRenderer fr, SequenceI[] sequences)
+ {
+ AlignViewportI comp = fr.getViewport().getCodingComplement();
+ FeatureRenderer fr2 = Desktop.getAlignFrameFor(comp)
+ .getFeatureRenderer();
/*
- * first output any non-positional features
+ * bin features by feature group and sequence
*/
- if (includeNonPositional)
+ Map<String, Map<String, List<SequenceFeature>>> map = new TreeMap<>(
+ String.CASE_INSENSITIVE_ORDER);
+ int count = 0;
+
+ for (SequenceI seq : sequences)
{
- for (int i = 0; i < sequences.length; i++)
+ /*
+ * find complementary features
+ */
+ List<SequenceFeature> complementary = findComplementaryFeatures(seq,
+ fr2);
+ String seqName = seq.getName();
+
+ for (SequenceFeature sf : complementary)
{
- String sequenceName = sequences[i].getName();
- for (SequenceFeature feature : sequences[i].getFeatures()
- .getNonPositionalFeatures())
+ String group = sf.getFeatureGroup();
+ if (!map.containsKey(group))
+ {
+ map.put(group, new LinkedHashMap<>()); // preserves sequence order
+ }
+ Map<String, List<SequenceFeature>> groupFeatures = map.get(group);
+ if (!groupFeatures.containsKey(seqName))
+ {
+ groupFeatures.put(seqName, new ArrayList<>());
+ }
+ List<SequenceFeature> foundFeatures = groupFeatures.get(seqName);
+ foundFeatures.add(sf);
+ count++;
+ }
+ }
+
+ /*
+ * output features by group
+ */
+ for (Entry<String, Map<String, List<SequenceFeature>>> groupFeatures : map.entrySet())
+ {
+ out.append(newline);
+ String group = groupFeatures.getKey();
+ if (!"".equals(group))
+ {
+ out.append(STARTGROUP).append(TAB).append(group).append(newline);
+ }
+ Map<String, List<SequenceFeature>> seqFeaturesMap = groupFeatures
+ .getValue();
+ for (Entry<String, List<SequenceFeature>> seqFeatures : seqFeaturesMap
+ .entrySet())
+ {
+ String sequenceName = seqFeatures.getKey();
+ for (SequenceFeature sf : seqFeatures.getValue())
{
- foundSome = true;
- out.append(formatJalviewFeature(sequenceName, feature));
+ formatJalviewFeature(out, sequenceName, sf);
}
}
+ if (!"".equals(group))
+ {
+ out.append(ENDGROUP).append(TAB).append(group).append(newline);
+ }
}
+ return count;
+ }
+
+ /**
+ * Answers a list of mapped features visible in the (CDS/protein) complement,
+ * with feature positions translated to local sequence coordinates
+ *
+ * @param seq
+ * @param fr2
+ * @return
+ */
+ protected List<SequenceFeature> findComplementaryFeatures(SequenceI seq,
+ FeatureRenderer fr2)
+ {
/*
- * positional features within groups
+ * avoid duplication of features (e.g. peptide feature
+ * at all 3 mapped codon positions)
*/
- foundSome |= outputFeaturesByGroup(out, sortedGroups, types, sequences);
+ List<SequenceFeature> found = new ArrayList<>();
+ List<SequenceFeature> complementary = new ArrayList<>();
- return foundSome ? out.toString() : "No Features Visible";
+ for (int pos = seq.getStart(); pos <= seq.getEnd(); pos++)
+ {
+ MappedFeatures mf = fr2.findComplementFeaturesAtResidue(seq, pos);
+
+ if (mf != null)
+ {
+ for (SequenceFeature sf : mf.features)
+ {
+ /*
+ * make a virtual feature with local coordinates
+ */
+ if (!found.contains(sf))
+ {
+ String group = sf.getFeatureGroup();
+ if (group == null)
+ {
+ group = "";
+ }
+ found.add(sf);
+ int begin = sf.getBegin();
+ int end = sf.getEnd();
+ int[] range = mf.getMappedPositions(begin, end);
+ SequenceFeature sf2 = new SequenceFeature(sf, range[0],
+ range[1], group, sf.getScore());
+ complementary.add(sf2);
+ }
+ }
+ }
+ }
+
+ return complementary;
}
/**
}
if (!first)
{
- out.append(ENDFILTERS).append(newline).append(newline);
+ out.append(ENDFILTERS).append(newline);
}
}
/**
- * Appends output of sequence features within feature groups to the output
- * buffer. Groups other than the null or empty group are sandwiched by
- * STARTGROUP and ENDGROUP lines.
+ * Appends output of visible sequence features within feature groups to the
+ * output buffer. Groups other than the null or empty group are sandwiched by
+ * STARTGROUP and ENDGROUP lines. Answers the number of features written.
*
* @param out
- * @param groups
+ * @param fr
* @param featureTypes
* @param sequences
+ * @param includeNonPositional
* @return
*/
- private boolean outputFeaturesByGroup(StringBuilder out,
- List<String> groups, String[] featureTypes, SequenceI[] sequences)
+ private int outputFeaturesByGroup(StringBuilder out,
+ FeatureRenderer fr, String[] featureTypes,
+ SequenceI[] sequences, boolean includeNonPositional)
{
- boolean foundSome = false;
- for (String group : groups)
+ List<String> featureGroups = fr.getFeatureGroups();
+
+ /*
+ * sort groups alphabetically, and ensure that features with a
+ * null or empty group are output after those in named groups
+ */
+ List<String> sortedGroups = new ArrayList<>(featureGroups);
+ sortedGroups.remove(null);
+ sortedGroups.remove("");
+ Collections.sort(sortedGroups);
+ sortedGroups.add(null);
+ sortedGroups.add("");
+
+ int count = 0;
+ List<String> visibleGroups = fr.getDisplayedFeatureGroups();
+
+ /*
+ * loop over all groups (may be visible or not);
+ * non-positional features are output even if group is not visible
+ */
+ for (String group : sortedGroups)
{
- boolean isNamedGroup = (group != null && !"".equals(group));
- if (isNamedGroup)
- {
- out.append(newline);
- out.append(STARTGROUP).append(TAB);
- out.append(group);
- out.append(newline);
- }
+ boolean firstInGroup = true;
+ boolean isNullGroup = group == null || "".equals(group);
- /*
- * output positional features within groups
- */
for (int i = 0; i < sequences.length; i++)
{
String sequenceName = sequences[i].getName();
List<SequenceFeature> features = new ArrayList<>();
- if (featureTypes.length > 0)
+
+ /*
+ * get any non-positional features in this group, if wanted
+ * (for any feature type, whether visible or not)
+ */
+ if (includeNonPositional)
+ {
+ features.addAll(sequences[i].getFeatures()
+ .getFeaturesForGroup(false, group));
+ }
+
+ /*
+ * add positional features for visible feature types, but
+ * (for named groups) only if feature group is visible
+ */
+ if (featureTypes.length > 0
+ && (isNullGroup || visibleGroups.contains(group)))
{
features.addAll(sequences[i].getFeatures().getFeaturesForGroup(
true, group, featureTypes));
}
- for (SequenceFeature sequenceFeature : features)
+ for (SequenceFeature sf : features)
{
- foundSome = true;
- out.append(formatJalviewFeature(sequenceName, sequenceFeature));
+ if (sf.isNonPositional() || fr.isVisible(sf))
+ {
+ count++;
+ if (firstInGroup)
+ {
+ out.append(newline);
+ if (!isNullGroup)
+ {
+ out.append(STARTGROUP).append(TAB).append(group)
+ .append(newline);
+ }
+ }
+ firstInGroup = false;
+ formatJalviewFeature(out, sequenceName, sf);
+ }
}
}
- if (isNamedGroup)
+ if (!isNullGroup && !firstInGroup)
{
- out.append(ENDGROUP).append(TAB);
- out.append(group);
- out.append(newline);
+ out.append(ENDGROUP).append(TAB).append(group).append(newline);
}
}
- return foundSome;
+ return count;
}
/**
+ * Formats one feature in Jalview format and appends to the string buffer
+ *
* @param out
* @param sequenceName
* @param sequenceFeature
*/
- protected String formatJalviewFeature(
- String sequenceName, SequenceFeature sequenceFeature)
+ protected void formatJalviewFeature(
+ StringBuilder out, String sequenceName,
+ SequenceFeature sequenceFeature)
{
- StringBuilder out = new StringBuilder(64);
if (sequenceFeature.description == null
|| sequenceFeature.description.equals(""))
{
if (sequenceFeature.description.indexOf(href) == -1)
{
- out.append(" <a href=\"" + href + "\">" + label + "</a>");
+ out.append(" <a href=\"").append(href).append("\">")
+ .append(label).append("</a>");
}
}
out.append(sequenceFeature.score);
}
out.append(newline);
-
- return out.toString();
}
/**
* Returns features output in GFF2 format
*
* @param sequences
- * the sequences whose features are to be output
+ * the sequences whose features are to be
+ * output
* @param visible
- * a map whose keys are the type names of visible features
+ * a map whose keys are the type names of
+ * visible features
* @param visibleFeatureGroups
* @param includeNonPositionalFeatures
+ * @param includeComplement
* @return
*/
public String printGffFormat(SequenceI[] sequences,
- Map<String, FeatureColourI> visible,
- List<String> visibleFeatureGroups,
- boolean includeNonPositionalFeatures)
+ FeatureRenderer fr, boolean includeNonPositionalFeatures,
+ boolean includeComplement)
{
+ FeatureRenderer fr2 = null;
+ if (includeComplement)
+ {
+ AlignViewportI comp = fr.getViewport().getCodingComplement();
+ fr2 = Desktop.getAlignFrameFor(comp).getFeatureRenderer();
+ }
+
+ Map<String, FeatureColourI> visibleColours = fr.getDisplayedFeatureCols();
+
StringBuilder out = new StringBuilder(256);
out.append(String.format("%s %d\n", GFF_VERSION, gffVersion == 0 ? 2 : gffVersion));
- if (!includeNonPositionalFeatures
- && (visible == null || visible.isEmpty()))
- {
- return out.toString();
- }
-
- String[] types = visible == null ? new String[0] : visible.keySet()
- .toArray(
- new String[visible.keySet().size()]);
+ String[] types = visibleColours == null ? new String[0]
+ : visibleColours.keySet()
+ .toArray(new String[visibleColours.keySet().size()]);
for (SequenceI seq : sequences)
{
+ List<SequenceFeature> seqFeatures = new ArrayList<>();
List<SequenceFeature> features = new ArrayList<>();
if (includeNonPositionalFeatures)
{
features.addAll(seq.getFeatures().getNonPositionalFeatures());
}
- if (visible != null && !visible.isEmpty())
+ if (visibleColours != null && !visibleColours.isEmpty())
{
features.addAll(seq.getFeatures().getPositionalFeatures(types));
}
-
for (SequenceFeature sf : features)
{
- String source = sf.featureGroup;
- if (!sf.isNonPositional() && source != null
- && !visibleFeatureGroups.contains(source))
+ if (sf.isNonPositional() || fr.isVisible(sf))
{
- // group is not visible
- continue;
- }
-
- if (source == null)
- {
- source = sf.getDescription();
+ /*
+ * drop features hidden by group visibility, colour threshold,
+ * or feature filter condition
+ */
+ seqFeatures.add(sf);
}
+ }
- out.append(seq.getName());
- out.append(TAB);
- out.append(source);
- out.append(TAB);
- out.append(sf.type);
- out.append(TAB);
- out.append(sf.begin);
- out.append(TAB);
- out.append(sf.end);
- out.append(TAB);
- out.append(sf.score);
- out.append(TAB);
-
- int strand = sf.getStrand();
- out.append(strand == 1 ? "+" : (strand == -1 ? "-" : "."));
- out.append(TAB);
-
- String phase = sf.getPhase();
- out.append(phase == null ? "." : phase);
-
- // miscellaneous key-values (GFF column 9)
- String attributes = sf.getAttributes();
- if (attributes != null)
- {
- out.append(TAB).append(attributes);
- }
+ if (includeComplement)
+ {
+ seqFeatures.addAll(findComplementaryFeatures(seq, fr2));
+ }
+ /*
+ * sort features here if wanted
+ */
+ for (SequenceFeature sf : seqFeatures)
+ {
+ formatGffFeature(out, seq, sf);
out.append(newline);
}
}
}
/**
+ * Formats one feature as GFF and appends to the string buffer
+ */
+ private void formatGffFeature(StringBuilder out, SequenceI seq,
+ SequenceFeature sf)
+ {
+ String source = sf.featureGroup;
+ if (source == null)
+ {
+ source = sf.getDescription();
+ }
+
+ out.append(seq.getName());
+ out.append(TAB);
+ out.append(source);
+ out.append(TAB);
+ out.append(sf.type);
+ out.append(TAB);
+ out.append(sf.begin);
+ out.append(TAB);
+ out.append(sf.end);
+ out.append(TAB);
+ out.append(sf.score);
+ out.append(TAB);
+
+ int strand = sf.getStrand();
+ out.append(strand == 1 ? "+" : (strand == -1 ? "-" : "."));
+ out.append(TAB);
+
+ String phase = sf.getPhase();
+ out.append(phase == null ? "." : phase);
+
+ if (sf.otherDetails != null && !sf.otherDetails.isEmpty())
+ {
+ Map<String, Object> map = sf.otherDetails;
+ formatAttributes(out, map);
+ }
+ }
+
+ /**
+ * A helper method that outputs attributes stored in the map as
+ * semicolon-delimited values e.g.
+ *
+ * <pre>
+ * AC_Male=0;AF_NFE=0.00000e 00;Hom_FIN=0;GQ_MEDIAN=9
+ * </pre>
+ *
+ * A map-valued attribute is formatted as a comma-delimited list within braces,
+ * for example
+ *
+ * <pre>
+ * jvmap_CSQ={ALLELE_NUM=1,UNIPARC=UPI0002841053,Feature=ENST00000585561}
+ * </pre>
+ *
+ * The {@code jvmap_} prefix designates a values map and is removed if the value
+ * is parsed when read in. (The GFF3 specification allows 'semi-structured data'
+ * to be represented provided the attribute name begins with a lower case
+ * letter.)
+ *
+ * @param sb
+ * @param map
+ * @see http://gmod.org/wiki/GFF3#GFF3_Format
+ */
+ void formatAttributes(StringBuilder sb, Map<String, Object> map)
+ {
+ sb.append(TAB);
+ boolean first = true;
+ for (String key : map.keySet())
+ {
+ if (SequenceFeature.STRAND.equals(key)
+ || SequenceFeature.PHASE.equals(key))
+ {
+ /*
+ * values stashed in map but output to their own columns
+ */
+ continue;
+ }
+ {
+ if (!first)
+ {
+ sb.append(";");
+ }
+ }
+ first = false;
+ Object value = map.get(key);
+ if (value instanceof Map<?, ?>)
+ {
+ formatMapAttribute(sb, key, (Map<?, ?>) value);
+ }
+ else
+ {
+ String formatted = StringUtils.urlEncode(value.toString(),
+ GffHelperI.GFF_ENCODABLE);
+ sb.append(key).append(EQUALS).append(formatted);
+ }
+ }
+ }
+
+ /**
+ * Formats the map entries as
+ *
+ * <pre>
+ * key=key1=value1,key2=value2,...
+ * </pre>
+ *
+ * and appends this to the string buffer
+ *
+ * @param sb
+ * @param key
+ * @param map
+ */
+ private void formatMapAttribute(StringBuilder sb, String key,
+ Map<?, ?> map)
+ {
+ if (map == null || map.isEmpty())
+ {
+ return;
+ }
+
+ /*
+ * AbstractMap.toString would be a shortcut here, but more reliable
+ * to code the required format in case toString changes in future
+ */
+ sb.append(key).append(EQUALS);
+ boolean first = true;
+ for (Entry<?, ?> entry : map.entrySet())
+ {
+ if (!first)
+ {
+ sb.append(",");
+ }
+ first = false;
+ sb.append(entry.getKey().toString()).append(EQUALS);
+ String formatted = StringUtils.urlEncode(entry.getValue().toString(),
+ GffHelperI.GFF_ENCODABLE);
+ sb.append(formatted);
+ }
+ }
+
+ /**
* Returns a mapping given list of one or more Align descriptors (exonerate
* format)
*
* @param alignedRegions
- * a list of "Align fromStart toStart fromCount"
+ * a list of "Align fromStart toStart fromCount"
* @param mapIsFromCdna
- * if true, 'from' is dna, else 'from' is protein
+ * if true, 'from' is dna, else 'from' is protein
* @param strand
- * either 1 (forward) or -1 (reverse)
+ * either 1 (forward) or -1 (reverse)
* @return
* @throws IOException
*/
}
/**
- * Process the 'column 9' data of the GFF file. This is less formally defined,
- * and its interpretation will vary depending on the tool that has generated
- * it.
- *
- * @param attributes
- * @param sf
- */
- protected void processGffColumnNine(String attributes, SequenceFeature sf)
- {
- sf.setAttributes(attributes);
-
- /*
- * Parse attributes in column 9 and add them to the sequence feature's
- * 'otherData' table; use Note as a best proxy for description
- */
- char nameValueSeparator = gffVersion == 3 ? '=' : ' ';
- // TODO check we don't break GFF2 values which include commas here
- Map<String, List<String>> nameValues = GffHelperBase
- .parseNameValuePairs(attributes, ";", nameValueSeparator, ",");
- for (Entry<String, List<String>> attr : nameValues.entrySet())
- {
- String values = StringUtils.listToDelimitedString(attr.getValue(),
- "; ");
- sf.setValue(attr.getKey(), values);
- if (NOTE.equals(attr.getKey()))
- {
- sf.setDescription(values);
- }
- }
- }
-
- /**
* After encountering ##fasta in a GFF3 file, process the remainder of the
* file as FAST sequence data. Any placeholder sequences created during
* feature parsing are updated with the actual sequences.