*/
package jalview.io;
+import java.awt.Color;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.TreeMap;
+
import jalview.analysis.AlignmentUtils;
import jalview.analysis.SequenceIdMatcher;
import jalview.api.AlignViewportI;
import jalview.datamodel.features.FeatureMatcherSet;
import jalview.datamodel.features.FeatureMatcherSetI;
import jalview.gui.Desktop;
-import jalview.io.gff.GffHelperBase;
import jalview.io.gff.GffHelperFactory;
import jalview.io.gff.GffHelperI;
import jalview.schemes.FeatureColour;
import jalview.util.ParseHtmlBodyAndLinks;
import jalview.util.StringUtils;
-import java.awt.Color;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.LinkedHashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Map.Entry;
-import java.util.TreeMap;
-
/**
* Parses and writes features files, which may be in Jalview, GFF2 or GFF3
* format. These are tab-delimited formats but with differences in the use of
*/
public class FeaturesFile extends AlignFile implements FeaturesSourceI
{
+ private static final String EQUALS = "=";
+
private static final String TAB_REGEX = "\\t";
private static final String STARTGROUP = "STARTGROUP";
private static final String ID_NOT_SPECIFIED = "ID_NOT_SPECIFIED";
- private static final String NOTE = "Note";
-
protected static final String GFF_VERSION = "##gff-version";
private AlignmentI lastmatchedAl = null;
* Constructor which does not parse the file immediately
*
* @param file
+ * File or String filename
* @param paste
* @throws IOException
*/
- public FeaturesFile(String file, DataSourceType paste)
- throws IOException
+ public FeaturesFile(Object file, DataSourceType paste) throws IOException
{
super(false, file, paste);
}
* @param type
* @throws IOException
*/
- public FeaturesFile(boolean parseImmediately, String file,
+ public FeaturesFile(boolean parseImmediately, Object file,
DataSourceType type) throws IOException
{
super(parseImmediately, file, type);
// skip comments/process pragmas
if (line.length() == 0 || line.startsWith("#"))
{
- if (line.toLowerCase().startsWith("##"))
+ if (line.toLowerCase(Locale.ROOT).startsWith("##"))
{
processGffPragma(line, gffProps, align, newseqs);
}
// should report somewhere useful for UI if necessary
warningMessage = ((warningMessage == null) ? "" : warningMessage)
+ "Parsing error at\n" + line;
- System.out.println("Error parsing feature file: " + ex + "\n" + line);
+ jalview.bin.Console.outPrintln(
+ "Error parsing feature file: " + ex + "\n" + line);
ex.printStackTrace(System.err);
resetMatcher();
return false;
String line;
while ((line = nextLine()) != null)
{
- if (line.toUpperCase().startsWith(ENDFILTERS))
+ if (line.toUpperCase(Locale.ROOT).startsWith(ENDFILTERS))
{
return;
}
String[] tokens = line.split(TAB_REGEX);
if (tokens.length != 2)
{
- System.err.println(String.format("Invalid token count %d for %d",
- tokens.length, line));
+ jalview.bin.Console.errPrintln(String.format(
+ "Invalid token count %d for %d", tokens.length, line));
}
else
{
*/
if (gffColumns.length < 6)
{
- System.err.println("Ignoring feature line '" + line
+ jalview.bin.Console.errPrintln("Ignoring feature line '" + line
+ "' with too few columns (" + gffColumns.length + ")");
return false;
}
seq = alignment.getSequenceAt(idx);
} catch (NumberFormatException ex)
{
- System.err.println("Invalid sequence index: " + seqIndex);
+ jalview.bin.Console
+ .errPrintln("Invalid sequence index: " + seqIndex);
}
}
if (seq == null)
{
- System.out.println("Sequence not found: " + line);
+ jalview.bin.Console.outPrintln("Sequence not found: " + line);
return false;
}
}
/**
- * Returns contents of a Jalview format features file, for visible features, as
- * filtered by type and group. Features with a null group are displayed if their
- * feature type is visible. Non-positional features may optionally be included
- * (with no check on type or group).
+ * Returns contents of a Jalview format features file, for visible features,
+ * as filtered by type and group. Features with a null group are displayed if
+ * their feature type is visible. Non-positional features may optionally be
+ * included (with no check on type or group).
*
* @param sequences
* @param fr
* @param includeNonPositional
- * if true, include non-positional features
- * (regardless of group or type)
+ * if true, include non-positional features (regardless of group or
+ * type)
* @param includeComplement
- * if true, include visible complementary
- * (CDS/protein) positional features, with
- * locations converted to local sequence
- * coordinates
+ * if true, include visible complementary (CDS/protein) positional
+ * features, with locations converted to local sequence coordinates
* @return
*/
public String printJalviewFormat(SequenceI[] sequences,
.entrySet())
{
FeatureColourI colour = featureColour.getValue();
- out.append(colour.toJalviewFormat(featureColour.getKey())).append(
- newline);
+ out.append(colour.toJalviewFormat(featureColour.getKey()))
+ .append(newline);
}
}
/**
* Outputs any visible complementary (CDS/peptide) positional features as
- * Jalview format, within feature group. The coordinates of the linked features
- * are converted to the corresponding positions of the local sequences.
+ * Jalview format, within feature group. The coordinates of the linked
+ * features are converted to the corresponding positions of the local
+ * sequences.
*
* @param out
* @param fr
/*
* output features by group
*/
- for (Entry<String, Map<String, List<SequenceFeature>>> groupFeatures : map.entrySet())
+ for (Entry<String, Map<String, List<SequenceFeature>>> groupFeatures : map
+ .entrySet())
{
out.append(newline);
String group = groupFeatures.getKey();
if (mf != null)
{
- MapList mapping = mf.mapping.getMap();
for (SequenceFeature sf : mf.features)
{
/*
found.add(sf);
int begin = sf.getBegin();
int end = sf.getEnd();
- int[] range = mf.mapping.getTo() == seq.getDatasetSequence()
- ? mapping.locateInTo(begin, end)
- : mapping.locateInFrom(begin, end);
+ int[] range = mf.getMappedPositions(begin, end);
SequenceFeature sf2 = new SequenceFeature(sf, range[0],
range[1], group, sf.getScore());
complementary.add(sf2);
}
/**
- * Outputs any feature filters defined for visible feature types, sandwiched by
- * STARTFILTERS and ENDFILTERS lines
+ * Outputs any feature filters defined for visible feature types, sandwiched
+ * by STARTFILTERS and ENDFILTERS lines
*
* @param out
* @param visible
* @param includeNonPositional
* @return
*/
- private int outputFeaturesByGroup(StringBuilder out,
- FeatureRenderer fr, String[] featureTypes,
- SequenceI[] sequences, boolean includeNonPositional)
+ private int outputFeaturesByGroup(StringBuilder out, FeatureRenderer fr,
+ String[] featureTypes, SequenceI[] sequences,
+ boolean includeNonPositional)
{
List<String> featureGroups = fr.getFeatureGroups();
if (featureTypes.length > 0
&& (isNullGroup || visibleGroups.contains(group)))
{
- features.addAll(sequences[i].getFeatures().getFeaturesForGroup(
- true, group, featureTypes));
+ features.addAll(sequences[i].getFeatures()
+ .getFeaturesForGroup(true, group, featureTypes));
}
for (SequenceFeature sf : features)
* @param sequenceName
* @param sequenceFeature
*/
- protected void formatJalviewFeature(
- StringBuilder out, String sequenceName,
- SequenceFeature sequenceFeature)
+ protected void formatJalviewFeature(StringBuilder out,
+ String sequenceName, SequenceFeature sequenceFeature)
{
if (sequenceFeature.description == null
|| sequenceFeature.description.equals(""))
@Override
public String print(SequenceI[] sqs, boolean jvsuffix)
{
- System.out.println("Use printGffFormat() or printJalviewFormat()");
+ jalview.bin.Console
+ .outPrintln("Use printGffFormat() or printJalviewFormat()");
return null;
}
* Returns features output in GFF2 format
*
* @param sequences
- * the sequences whose features are to be
- * output
+ * the sequences whose features are to be output
* @param visible
- * a map whose keys are the type names of
- * visible features
+ * a map whose keys are the type names of visible features
* @param visibleFeatureGroups
* @param includeNonPositionalFeatures
* @param includeComplement
* @return
*/
- public String printGffFormat(SequenceI[] sequences,
- FeatureRenderer fr, boolean includeNonPositionalFeatures,
- boolean includeComplement)
+ public String printGffFormat(SequenceI[] sequences, FeatureRenderer fr,
+ boolean includeNonPositionalFeatures, boolean includeComplement)
{
FeatureRenderer fr2 = null;
if (includeComplement)
fr2 = Desktop.getAlignFrameFor(comp).getFeatureRenderer();
}
- Map<String, FeatureColourI> visibleColours = fr.getDisplayedFeatureCols();
+ Map<String, FeatureColourI> visibleColours = fr
+ .getDisplayedFeatureCols();
StringBuilder out = new StringBuilder(256);
- out.append(String.format("%s %d\n", GFF_VERSION, gffVersion == 0 ? 2 : gffVersion));
+ out.append(String.format("%s %d\n", GFF_VERSION,
+ gffVersion == 0 ? 2 : gffVersion));
String[] types = visibleColours == null ? new String[0]
: visibleColours.keySet()
String phase = sf.getPhase();
out.append(phase == null ? "." : phase);
- // miscellaneous key-values (GFF column 9)
- String attributes = sf.getAttributes();
- if (attributes != null)
+ if (sf.otherDetails != null && !sf.otherDetails.isEmpty())
+ {
+ Map<String, Object> map = sf.otherDetails;
+ formatAttributes(out, map);
+ }
+ }
+
+ /**
+ * A helper method that outputs attributes stored in the map as
+ * semicolon-delimited values e.g.
+ *
+ * <pre>
+ * AC_Male=0;AF_NFE=0.00000e 00;Hom_FIN=0;GQ_MEDIAN=9
+ * </pre>
+ *
+ * A map-valued attribute is formatted as a comma-delimited list within
+ * braces, for example
+ *
+ * <pre>
+ * jvmap_CSQ={ALLELE_NUM=1,UNIPARC=UPI0002841053,Feature=ENST00000585561}
+ * </pre>
+ *
+ * The {@code jvmap_} prefix designates a values map and is removed if the
+ * value is parsed when read in. (The GFF3 specification allows
+ * 'semi-structured data' to be represented provided the attribute name begins
+ * with a lower case letter.)
+ *
+ * @param sb
+ * @param map
+ * @see http://gmod.org/wiki/GFF3#GFF3_Format
+ */
+ void formatAttributes(StringBuilder sb, Map<String, Object> map)
+ {
+ sb.append(TAB);
+ boolean first = true;
+ for (String key : map.keySet())
+ {
+ if (SequenceFeature.STRAND.equals(key)
+ || SequenceFeature.PHASE.equals(key))
+ {
+ /*
+ * values stashed in map but output to their own columns
+ */
+ continue;
+ }
+ {
+ if (!first)
+ {
+ sb.append(";");
+ }
+ }
+ first = false;
+ Object value = map.get(key);
+ if (value instanceof Map<?, ?>)
+ {
+ formatMapAttribute(sb, key, (Map<?, ?>) value);
+ }
+ else
+ {
+ String formatted = StringUtils.urlEncode(value.toString(),
+ GffHelperI.GFF_ENCODABLE);
+ sb.append(key).append(EQUALS).append(formatted);
+ }
+ }
+ }
+
+ /**
+ * Formats the map entries as
+ *
+ * <pre>
+ * key=key1=value1,key2=value2,...
+ * </pre>
+ *
+ * and appends this to the string buffer
+ *
+ * @param sb
+ * @param key
+ * @param map
+ */
+ private void formatMapAttribute(StringBuilder sb, String key,
+ Map<?, ?> map)
+ {
+ if (map == null || map.isEmpty())
{
- out.append(TAB).append(attributes);
+ return;
+ }
+
+ /*
+ * AbstractMap.toString would be a shortcut here, but more reliable
+ * to code the required format in case toString changes in future
+ */
+ sb.append(key).append(EQUALS);
+ boolean first = true;
+ for (Entry<?, ?> entry : map.entrySet())
+ {
+ if (!first)
+ {
+ sb.append(",");
+ }
+ first = false;
+ sb.append(entry.getKey().toString()).append(EQUALS);
+ String formatted = StringUtils.urlEncode(entry.getValue().toString(),
+ GffHelperI.GFF_ENCODABLE);
+ sb.append(formatted);
}
}
*/
if (gffColumns.length < 5)
{
- System.err.println("Ignoring GFF feature line with too few columns ("
- + gffColumns.length + ")");
+ jalview.bin.Console
+ .errPrintln("Ignoring GFF feature line with too few columns ("
+ + gffColumns.length + ")");
return null;
}
}
} catch (IOException e)
{
- System.err.println("GFF parsing failed with: " + e.getMessage());
+ jalview.bin.Console
+ .errPrintln("GFF parsing failed with: " + e.getMessage());
return null;
}
}
}
/**
- * Process the 'column 9' data of the GFF file. This is less formally defined,
- * and its interpretation will vary depending on the tool that has generated
- * it.
- *
- * @param attributes
- * @param sf
- */
- protected void processGffColumnNine(String attributes, SequenceFeature sf)
- {
- sf.setAttributes(attributes);
-
- /*
- * Parse attributes in column 9 and add them to the sequence feature's
- * 'otherData' table; use Note as a best proxy for description
- */
- char nameValueSeparator = gffVersion == 3 ? '=' : ' ';
- // TODO check we don't break GFF2 values which include commas here
- Map<String, List<String>> nameValues = GffHelperBase
- .parseNameValuePairs(attributes, ";", nameValueSeparator, ",");
- for (Entry<String, List<String>> attr : nameValues.entrySet())
- {
- String values = StringUtils.listToDelimitedString(attr.getValue(),
- "; ");
- sf.setValue(attr.getKey(), values);
- if (NOTE.equals(attr.getKey()))
- {
- sf.setDescription(values);
- }
- }
- }
-
- /**
* After encountering ##fasta in a GFF3 file, process the remainder of the
* file as FAST sequence data. Any placeholder sequences created during
* feature parsing are updated with the actual sequences.
} catch (IOException q)
{
}
- FastaFile parser = new FastaFile(this);
+ // Opening a FastaFile object with the remainder of this object's dataIn.
+ // Tell the constructor to NOT close the dataIn when finished.
+ FastaFile parser = new FastaFile(this, false);
List<SequenceI> includedseqs = parser.getSeqs();
SequenceIdMatcher smatcher = new SequenceIdMatcher(newseqs);
List<SequenceFeature> sfs = seq.getFeatures().getPositionalFeatures();
if (!sfs.isEmpty())
{
- String newName = (String) sfs.get(0).getValue(
- GffHelperI.RENAME_TOKEN);
+ String newName = (String) sfs.get(0)
+ .getValue(GffHelperI.RENAME_TOKEN);
if (newName != null)
{
seq.setName(newName);
}
else
{
- System.err.println("Ignoring unknown pragma: " + line);
+ jalview.bin.Console.errPrintln("Ignoring unknown pragma: " + line);
}
}
}