import jalview.datamodel.features.FeatureMatcherSet;
import jalview.datamodel.features.FeatureMatcherSetI;
import jalview.gui.Desktop;
-import jalview.io.gff.GffHelperBase;
import jalview.io.gff.GffHelperFactory;
import jalview.io.gff.GffHelperI;
import jalview.schemes.FeatureColour;
*/
public class FeaturesFile extends AlignFile implements FeaturesSourceI
{
+ private static final String EQUALS = "=";
+
private static final String TAB_REGEX = "\\t";
private static final String STARTGROUP = "STARTGROUP";
private static final String ID_NOT_SPECIFIED = "ID_NOT_SPECIFIED";
- private static final String NOTE = "Note";
-
protected static final String GFF_VERSION = "##gff-version";
private AlignmentI lastmatchedAl = null;
}
/**
- * Outputs any visible complementary positional features, within feature group
+ * Outputs any visible complementary (CDS/peptide) positional features as
+ * Jalview format, within feature group. The coordinates of the linked features
+ * are converted to the corresponding positions of the local sequences.
*
* @param out
* @param fr
.getFeatureRenderer();
/*
- * build a map of {group, {seqName, List<SequenceFeature>}}
+ * bin features by feature group and sequence
*/
- Map<String, Map<String, List<SequenceFeature>>> map = new TreeMap<>();
+ Map<String, Map<String, List<SequenceFeature>>> map = new TreeMap<>(
+ String.CASE_INSENSITIVE_ORDER);
int count = 0;
for (SequenceI seq : sequences)
{
/*
- * avoid duplication of features (e.g. peptide feature
- * at all 3 mapped codon positions)
+ * find complementary features
*/
- List<SequenceFeature> found = new ArrayList<>();
+ List<SequenceFeature> complementary = findComplementaryFeatures(seq,
+ fr2);
String seqName = seq.getName();
- for (int pos = seq.getStart(); pos <= seq.getEnd(); pos++)
+ for (SequenceFeature sf : complementary)
{
- MappedFeatures mf = fr2.findComplementFeaturesAtResidue(seq, pos);
-
- if (mf != null)
+ String group = sf.getFeatureGroup();
+ if (!map.containsKey(group))
{
- MapList mapping = mf.mapping.getMap();
- for (SequenceFeature sf : mf.features)
- {
- String group = sf.getFeatureGroup();
- if (group == null)
- {
- group = "";
- }
- if (!map.containsKey(group))
- {
- map.put(group, new LinkedHashMap<>());
- }
- Map<String, List<SequenceFeature>> groupFeatures = map
- .get(group);
- if (!groupFeatures.containsKey(seqName))
- {
- groupFeatures.put(seqName, new ArrayList<>());
- }
- List<SequenceFeature> foundFeatures = groupFeatures
- .get(seqName);
-
- /*
- * make a virtual feature with local coordinates
- */
- if (!found.contains(sf))
- {
- found.add(sf);
- int begin = sf.getBegin();
- int end = sf.getEnd();
- int[] range = mf.mapping.getTo() == seq.getDatasetSequence()
- ? mapping.locateInTo(begin, end)
- : mapping.locateInFrom(begin, end);
- SequenceFeature sf2 = new SequenceFeature(sf, range[0],
- range[1], group,
- sf.getScore());
- foundFeatures.add(sf2);
- count++;
- }
- }
+ map.put(group, new LinkedHashMap<>()); // preserves sequence order
}
+ Map<String, List<SequenceFeature>> groupFeatures = map.get(group);
+ if (!groupFeatures.containsKey(seqName))
+ {
+ groupFeatures.put(seqName, new ArrayList<>());
+ }
+ List<SequenceFeature> foundFeatures = groupFeatures.get(seqName);
+ foundFeatures.add(sf);
+ count++;
}
}
String sequenceName = seqFeatures.getKey();
for (SequenceFeature sf : seqFeatures.getValue())
{
- out.append(formatJalviewFeature(sequenceName, sf));
+ formatJalviewFeature(out, sequenceName, sf);
}
}
if (!"".equals(group))
}
/**
+ * Answers a list of mapped features visible in the (CDS/protein) complement,
+ * with feature positions translated to local sequence coordinates
+ *
+ * @param seq
+ * @param fr2
+ * @return
+ */
+ protected List<SequenceFeature> findComplementaryFeatures(SequenceI seq,
+ FeatureRenderer fr2)
+ {
+ /*
+ * avoid duplication of features (e.g. peptide feature
+ * at all 3 mapped codon positions)
+ */
+ List<SequenceFeature> found = new ArrayList<>();
+ List<SequenceFeature> complementary = new ArrayList<>();
+
+ for (int pos = seq.getStart(); pos <= seq.getEnd(); pos++)
+ {
+ MappedFeatures mf = fr2.findComplementFeaturesAtResidue(seq, pos);
+
+ if (mf != null)
+ {
+ MapList mapping = mf.mapping.getMap();
+ for (SequenceFeature sf : mf.features)
+ {
+ /*
+ * make a virtual feature with local coordinates
+ */
+ if (!found.contains(sf))
+ {
+ String group = sf.getFeatureGroup();
+ if (group == null)
+ {
+ group = "";
+ }
+ found.add(sf);
+ int begin = sf.getBegin();
+ int end = sf.getEnd();
+ int[] range = mf.mapping.getTo() == seq.getDatasetSequence()
+ ? mapping.locateInTo(begin, end)
+ : mapping.locateInFrom(begin, end);
+ SequenceFeature sf2 = new SequenceFeature(sf, range[0],
+ range[1], group, sf.getScore());
+ complementary.add(sf2);
+ }
+ }
+ }
+ }
+
+ return complementary;
+ }
+
+ /**
* Outputs any feature filters defined for visible feature types, sandwiched by
* STARTFILTERS and ENDFILTERS lines
*
}
}
firstInGroup = false;
- out.append(formatJalviewFeature(sequenceName, sf));
+ formatJalviewFeature(out, sequenceName, sf);
}
}
}
}
/**
+ * Formats one feature in Jalview format and appends to the string buffer
+ *
* @param out
* @param sequenceName
* @param sequenceFeature
*/
- protected String formatJalviewFeature(
- String sequenceName, SequenceFeature sequenceFeature)
+ protected void formatJalviewFeature(
+ StringBuilder out, String sequenceName,
+ SequenceFeature sequenceFeature)
{
- StringBuilder out = new StringBuilder(64);
if (sequenceFeature.description == null
|| sequenceFeature.description.equals(""))
{
if (sequenceFeature.description.indexOf(href) == -1)
{
- out.append(" <a href=\"" + href + "\">" + label + "</a>");
+ out.append(" <a href=\"").append(href).append("\">")
+ .append(label).append("</a>");
}
}
out.append(sequenceFeature.score);
}
out.append(newline);
-
- return out.toString();
}
/**
FeatureRenderer fr, boolean includeNonPositionalFeatures,
boolean includeComplement)
{
+ FeatureRenderer fr2 = null;
+ if (includeComplement)
+ {
+ AlignViewportI comp = fr.getViewport().getCodingComplement();
+ fr2 = Desktop.getAlignFrameFor(comp).getFeatureRenderer();
+ }
+
Map<String, FeatureColourI> visibleColours = fr.getDisplayedFeatureCols();
StringBuilder out = new StringBuilder(256);
out.append(String.format("%s %d\n", GFF_VERSION, gffVersion == 0 ? 2 : gffVersion));
- if (!includeNonPositionalFeatures
- && (visibleColours == null || visibleColours.isEmpty()))
- {
- return out.toString();
- }
-
String[] types = visibleColours == null ? new String[0]
: visibleColours.keySet()
.toArray(new String[visibleColours.keySet().size()]);
for (SequenceI seq : sequences)
{
+ List<SequenceFeature> seqFeatures = new ArrayList<>();
List<SequenceFeature> features = new ArrayList<>();
if (includeNonPositionalFeatures)
{
{
features.addAll(seq.getFeatures().getPositionalFeatures(types));
}
-
for (SequenceFeature sf : features)
{
- if (!sf.isNonPositional() && !fr.isVisible(sf))
+ if (sf.isNonPositional() || fr.isVisible(sf))
{
/*
- * feature hidden by group visibility, colour threshold,
+ * drop features hidden by group visibility, colour threshold,
* or feature filter condition
*/
- continue;
- }
-
- String source = sf.featureGroup;
- if (source == null)
- {
- source = sf.getDescription();
+ seqFeatures.add(sf);
}
+ }
- out.append(seq.getName());
- out.append(TAB);
- out.append(source);
- out.append(TAB);
- out.append(sf.type);
- out.append(TAB);
- out.append(sf.begin);
- out.append(TAB);
- out.append(sf.end);
- out.append(TAB);
- out.append(sf.score);
- out.append(TAB);
-
- int strand = sf.getStrand();
- out.append(strand == 1 ? "+" : (strand == -1 ? "-" : "."));
- out.append(TAB);
-
- String phase = sf.getPhase();
- out.append(phase == null ? "." : phase);
-
- // miscellaneous key-values (GFF column 9)
- String attributes = sf.getAttributes();
- if (attributes != null)
- {
- out.append(TAB).append(attributes);
- }
+ if (includeComplement)
+ {
+ seqFeatures.addAll(findComplementaryFeatures(seq, fr2));
+ }
+ /*
+ * sort features here if wanted
+ */
+ for (SequenceFeature sf : seqFeatures)
+ {
+ formatGffFeature(out, seq, sf);
out.append(newline);
}
}
}
/**
+ * Formats one feature as GFF and appends to the string buffer
+ */
+ private void formatGffFeature(StringBuilder out, SequenceI seq,
+ SequenceFeature sf)
+ {
+ String source = sf.featureGroup;
+ if (source == null)
+ {
+ source = sf.getDescription();
+ }
+
+ out.append(seq.getName());
+ out.append(TAB);
+ out.append(source);
+ out.append(TAB);
+ out.append(sf.type);
+ out.append(TAB);
+ out.append(sf.begin);
+ out.append(TAB);
+ out.append(sf.end);
+ out.append(TAB);
+ out.append(sf.score);
+ out.append(TAB);
+
+ int strand = sf.getStrand();
+ out.append(strand == 1 ? "+" : (strand == -1 ? "-" : "."));
+ out.append(TAB);
+
+ String phase = sf.getPhase();
+ out.append(phase == null ? "." : phase);
+
+ if (sf.otherDetails != null && !sf.otherDetails.isEmpty())
+ {
+ Map<String, Object> map = sf.otherDetails;
+ formatAttributes(out, map);
+ }
+ }
+
+ /**
+ * A helper method that outputs attributes stored in the map as
+ * semicolon-delimited values e.g.
+ *
+ * <pre>
+ * AC_Male=0;AF_NFE=0.00000e 00;Hom_FIN=0;GQ_MEDIAN=9
+ * </pre>
+ *
+ * A map-valued attribute is formatted as a comma-delimited list within braces,
+ * for example
+ *
+ * <pre>
+ * jvmap_CSQ={ALLELE_NUM=1,UNIPARC=UPI0002841053,Feature=ENST00000585561}
+ * </pre>
+ *
+ * The {@code jvmap_} prefix designates a values map and is removed if the value
+ * is parsed when read in. (The GFF3 specification allows 'semi-structured data'
+ * to be represented provided the attribute name begins with a lower case
+ * letter.)
+ *
+ * @param sb
+ * @param map
+ * @see http://gmod.org/wiki/GFF3#GFF3_Format
+ */
+ void formatAttributes(StringBuilder sb, Map<String, Object> map)
+ {
+ sb.append(TAB);
+ boolean first = true;
+ for (String key : map.keySet())
+ {
+ if (SequenceFeature.STRAND.equals(key)
+ || SequenceFeature.PHASE.equals(key))
+ {
+ /*
+ * values stashed in map but output to their own columns
+ */
+ continue;
+ }
+ {
+ if (!first)
+ {
+ sb.append(";");
+ }
+ }
+ first = false;
+ Object value = map.get(key);
+ if (value instanceof Map<?, ?>)
+ {
+ formatMapAttribute(sb, key, (Map<?, ?>) value);
+ }
+ else
+ {
+ String formatted = StringUtils.urlEncode(value.toString(),
+ GffHelperI.GFF_ENCODABLE);
+ sb.append(key).append(EQUALS).append(formatted);
+ }
+ }
+ }
+
+ /**
+ * Formats the map entries as
+ *
+ * <pre>
+ * key=key1=value1,key2=value2,...
+ * </pre>
+ *
+ * and appends this to the string buffer
+ *
+ * @param sb
+ * @param key
+ * @param map
+ */
+ private void formatMapAttribute(StringBuilder sb, String key,
+ Map<?, ?> map)
+ {
+ if (map == null || map.isEmpty())
+ {
+ return;
+ }
+
+ /*
+ * AbstractMap.toString would be a shortcut here, but more reliable
+ * to code the required format in case toString changes in future
+ */
+ sb.append(key).append(EQUALS);
+ boolean first = true;
+ for (Entry<?, ?> entry : map.entrySet())
+ {
+ if (!first)
+ {
+ sb.append(",");
+ }
+ first = false;
+ sb.append(entry.getKey().toString()).append(EQUALS);
+ String formatted = StringUtils.urlEncode(entry.getValue().toString(),
+ GffHelperI.GFF_ENCODABLE);
+ sb.append(formatted);
+ }
+ }
+
+ /**
* Returns a mapping given list of one or more Align descriptors (exonerate
* format)
*
* @param alignedRegions
- * a list of "Align fromStart toStart fromCount"
+ * a list of "Align fromStart toStart fromCount"
* @param mapIsFromCdna
- * if true, 'from' is dna, else 'from' is protein
+ * if true, 'from' is dna, else 'from' is protein
* @param strand
- * either 1 (forward) or -1 (reverse)
+ * either 1 (forward) or -1 (reverse)
* @return
* @throws IOException
*/
}
/**
- * Process the 'column 9' data of the GFF file. This is less formally defined,
- * and its interpretation will vary depending on the tool that has generated
- * it.
- *
- * @param attributes
- * @param sf
- */
- protected void processGffColumnNine(String attributes, SequenceFeature sf)
- {
- sf.setAttributes(attributes);
-
- /*
- * Parse attributes in column 9 and add them to the sequence feature's
- * 'otherData' table; use Note as a best proxy for description
- */
- char nameValueSeparator = gffVersion == 3 ? '=' : ' ';
- // TODO check we don't break GFF2 values which include commas here
- Map<String, List<String>> nameValues = GffHelperBase
- .parseNameValuePairs(attributes, ";", nameValueSeparator, ",");
- for (Entry<String, List<String>> attr : nameValues.entrySet())
- {
- String values = StringUtils.listToDelimitedString(attr.getValue(),
- "; ");
- sf.setValue(attr.getKey(), values);
- if (NOTE.equals(attr.getKey()))
- {
- sf.setDescription(values);
- }
- }
- }
-
- /**
* After encountering ##fasta in a GFF3 file, process the remainder of the
* file as FAST sequence data. Any placeholder sequences created during
* feature parsing are updated with the actual sequences.