*/
package jalview.io;
+import java.awt.Color;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.TreeMap;
+
import jalview.analysis.AlignmentUtils;
import jalview.analysis.SequenceIdMatcher;
import jalview.api.AlignViewportI;
import jalview.datamodel.features.FeatureMatcherSet;
import jalview.datamodel.features.FeatureMatcherSetI;
import jalview.gui.Desktop;
-import jalview.io.gff.GffHelperBase;
import jalview.io.gff.GffHelperFactory;
import jalview.io.gff.GffHelperI;
import jalview.schemes.FeatureColour;
import jalview.util.ParseHtmlBodyAndLinks;
import jalview.util.StringUtils;
-import java.awt.Color;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.LinkedHashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Map.Entry;
-import java.util.TreeMap;
-
/**
* Parses and writes features files, which may be in Jalview, GFF2 or GFF3
* format. These are tab-delimited formats but with differences in the use of
*/
public class FeaturesFile extends AlignFile implements FeaturesSourceI
{
+ private static final String EQUALS = "=";
+
private static final String TAB_REGEX = "\\t";
private static final String STARTGROUP = "STARTGROUP";
private static final String ID_NOT_SPECIFIED = "ID_NOT_SPECIFIED";
- private static final String NOTE = "Note";
-
protected static final String GFF_VERSION = "##gff-version";
private AlignmentI lastmatchedAl = null;
/**
* Constructor which does not parse the file immediately
*
- * @param file
+ * @param file File or String filename
* @param paste
* @throws IOException
*/
- public FeaturesFile(String file, DataSourceType paste)
+ public FeaturesFile(Object file, DataSourceType paste)
throws IOException
{
super(false, file, paste);
* @param type
* @throws IOException
*/
- public FeaturesFile(boolean parseImmediately, String file,
+ public FeaturesFile(boolean parseImmediately, Object file,
DataSourceType type) throws IOException
{
super(parseImmediately, file, type);
return count;
}
+ /**
+ * Answers a list of mapped features visible in the (CDS/protein) complement,
+ * with feature positions translated to local sequence coordinates
+ *
+ * @param seq
+ * @param fr2
+ * @return
+ */
protected List<SequenceFeature> findComplementaryFeatures(SequenceI seq,
FeatureRenderer fr2)
{
if (mf != null)
{
- MapList mapping = mf.mapping.getMap();
for (SequenceFeature sf : mf.features)
{
/*
found.add(sf);
int begin = sf.getBegin();
int end = sf.getEnd();
- int[] range = mf.mapping.getTo() == seq.getDatasetSequence()
- ? mapping.locateInTo(begin, end)
- : mapping.locateInFrom(begin, end);
+ int[] range = mf.getMappedPositions(begin, end);
SequenceFeature sf2 = new SequenceFeature(sf, range[0],
range[1], group, sf.getScore());
complementary.add(sf2);
String phase = sf.getPhase();
out.append(phase == null ? "." : phase);
- // miscellaneous key-values (GFF column 9)
- String attributes = sf.getAttributes();
- if (attributes != null)
+ if (sf.otherDetails != null && !sf.otherDetails.isEmpty())
{
- out.append(TAB).append(attributes);
+ Map<String, Object> map = sf.otherDetails;
+ formatAttributes(out, map);
+ }
+ }
+
+ /**
+ * A helper method that outputs attributes stored in the map as
+ * semicolon-delimited values e.g.
+ *
+ * <pre>
+ * AC_Male=0;AF_NFE=0.00000e 00;Hom_FIN=0;GQ_MEDIAN=9
+ * </pre>
+ *
+ * A map-valued attribute is formatted as a comma-delimited list within braces,
+ * for example
+ *
+ * <pre>
+ * jvmap_CSQ={ALLELE_NUM=1,UNIPARC=UPI0002841053,Feature=ENST00000585561}
+ * </pre>
+ *
+ * The {@code jvmap_} prefix designates a values map and is removed if the value
+ * is parsed when read in. (The GFF3 specification allows 'semi-structured data'
+ * to be represented provided the attribute name begins with a lower case
+ * letter.)
+ *
+ * @param sb
+ * @param map
+ * @see http://gmod.org/wiki/GFF3#GFF3_Format
+ */
+ void formatAttributes(StringBuilder sb, Map<String, Object> map)
+ {
+ sb.append(TAB);
+ boolean first = true;
+ for (String key : map.keySet())
+ {
+ if (SequenceFeature.STRAND.equals(key)
+ || SequenceFeature.PHASE.equals(key))
+ {
+ /*
+ * values stashed in map but output to their own columns
+ */
+ continue;
+ }
+ {
+ if (!first)
+ {
+ sb.append(";");
+ }
+ }
+ first = false;
+ Object value = map.get(key);
+ if (value instanceof Map<?, ?>)
+ {
+ formatMapAttribute(sb, key, (Map<?, ?>) value);
+ }
+ else
+ {
+ String formatted = StringUtils.urlEncode(value.toString(),
+ GffHelperI.GFF_ENCODABLE);
+ sb.append(key).append(EQUALS).append(formatted);
+ }
+ }
+ }
+
+ /**
+ * Formats the map entries as
+ *
+ * <pre>
+ * key=key1=value1,key2=value2,...
+ * </pre>
+ *
+ * and appends this to the string buffer
+ *
+ * @param sb
+ * @param key
+ * @param map
+ */
+ private void formatMapAttribute(StringBuilder sb, String key,
+ Map<?, ?> map)
+ {
+ if (map == null || map.isEmpty())
+ {
+ return;
+ }
+
+ /*
+ * AbstractMap.toString would be a shortcut here, but more reliable
+ * to code the required format in case toString changes in future
+ */
+ sb.append(key).append(EQUALS);
+ boolean first = true;
+ for (Entry<?, ?> entry : map.entrySet())
+ {
+ if (!first)
+ {
+ sb.append(",");
+ }
+ first = false;
+ sb.append(entry.getKey().toString()).append(EQUALS);
+ String formatted = StringUtils.urlEncode(entry.getValue().toString(),
+ GffHelperI.GFF_ENCODABLE);
+ sb.append(formatted);
}
}
* format)
*
* @param alignedRegions
- * a list of "Align fromStart toStart fromCount"
+ * a list of "Align fromStart toStart fromCount"
* @param mapIsFromCdna
- * if true, 'from' is dna, else 'from' is protein
+ * if true, 'from' is dna, else 'from' is protein
* @param strand
- * either 1 (forward) or -1 (reverse)
+ * either 1 (forward) or -1 (reverse)
* @return
* @throws IOException
*/
}
/**
- * Process the 'column 9' data of the GFF file. This is less formally defined,
- * and its interpretation will vary depending on the tool that has generated
- * it.
- *
- * @param attributes
- * @param sf
- */
- protected void processGffColumnNine(String attributes, SequenceFeature sf)
- {
- sf.setAttributes(attributes);
-
- /*
- * Parse attributes in column 9 and add them to the sequence feature's
- * 'otherData' table; use Note as a best proxy for description
- */
- char nameValueSeparator = gffVersion == 3 ? '=' : ' ';
- // TODO check we don't break GFF2 values which include commas here
- Map<String, List<String>> nameValues = GffHelperBase
- .parseNameValuePairs(attributes, ";", nameValueSeparator, ",");
- for (Entry<String, List<String>> attr : nameValues.entrySet())
- {
- String values = StringUtils.listToDelimitedString(attr.getValue(),
- "; ");
- sf.setValue(attr.getKey(), values);
- if (NOTE.equals(attr.getKey()))
- {
- sf.setDescription(values);
- }
- }
- }
-
- /**
* After encountering ##fasta in a GFF3 file, process the remainder of the
* file as FAST sequence data. Any placeholder sequences created during
* feature parsing are updated with the actual sequences.