X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fio%2FFeaturesFile.java;h=745bce3ad9bc0726a0b2688918ed3e7dc8dd9836;hb=17b7d054cf7faa5ee57ad8c8c4c9daa495d8cb35;hp=f91ea866f586b0ba36b6760be77906a60abc3b09;hpb=0b573ed90b14079f7326281f50c0c9cffdace586;p=jalview.git diff --git a/src/jalview/io/FeaturesFile.java b/src/jalview/io/FeaturesFile.java index f91ea86..745bce3 100755 --- a/src/jalview/io/FeaturesFile.java +++ b/src/jalview/io/FeaturesFile.java @@ -20,6 +20,20 @@ */ package jalview.io; +import java.util.Locale; + +import java.awt.Color; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.TreeMap; + import jalview.analysis.AlignmentUtils; import jalview.analysis.SequenceIdMatcher; import jalview.api.AlignViewportI; @@ -36,7 +50,6 @@ import jalview.datamodel.SequenceI; import jalview.datamodel.features.FeatureMatcherSet; import jalview.datamodel.features.FeatureMatcherSetI; import jalview.gui.Desktop; -import jalview.io.gff.GffHelperBase; import jalview.io.gff.GffHelperFactory; import jalview.io.gff.GffHelperI; import jalview.schemes.FeatureColour; @@ -45,18 +58,6 @@ import jalview.util.MapList; import jalview.util.ParseHtmlBodyAndLinks; import jalview.util.StringUtils; -import java.awt.Color; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.HashMap; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.TreeMap; - /** * Parses and writes features files, which may be in Jalview, GFF2 or GFF3 * format. These are tab-delimited formats but with differences in the use of @@ -75,6 +76,8 @@ import java.util.TreeMap; */ public class FeaturesFile extends AlignFile implements FeaturesSourceI { + private static final String EQUALS = "="; + private static final String TAB_REGEX = "\\t"; private static final String STARTGROUP = "STARTGROUP"; @@ -87,8 +90,6 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI private static final String ID_NOT_SPECIFIED = "ID_NOT_SPECIFIED"; - private static final String NOTE = "Note"; - protected static final String GFF_VERSION = "##gff-version"; private AlignmentI lastmatchedAl = null; @@ -109,12 +110,12 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI /** * Constructor which does not parse the file immediately * - * @param file File or String filename + * @param file + * File or String filename * @param paste * @throws IOException */ - public FeaturesFile(Object file, DataSourceType paste) - throws IOException + public FeaturesFile(Object file, DataSourceType paste) throws IOException { super(false, file, paste); } @@ -235,7 +236,7 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI // skip comments/process pragmas if (line.length() == 0 || line.startsWith("#")) { - if (line.toLowerCase().startsWith("##")) + if (line.toLowerCase(Locale.ROOT).startsWith("##")) { processGffPragma(line, gffProps, align, newseqs); } @@ -347,7 +348,7 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI String line; while ((line = nextLine()) != null) { - if (line.toUpperCase().startsWith(ENDFILTERS)) + if (line.toUpperCase(Locale.ROOT).startsWith(ENDFILTERS)) { return; } @@ -567,21 +568,19 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI } /** - * Returns contents of a Jalview format features file, for visible features, as - * filtered by type and group. Features with a null group are displayed if their - * feature type is visible. Non-positional features may optionally be included - * (with no check on type or group). + * Returns contents of a Jalview format features file, for visible features, + * as filtered by type and group. Features with a null group are displayed if + * their feature type is visible. Non-positional features may optionally be + * included (with no check on type or group). * * @param sequences * @param fr * @param includeNonPositional - * if true, include non-positional features - * (regardless of group or type) + * if true, include non-positional features (regardless of group or + * type) * @param includeComplement - * if true, include visible complementary - * (CDS/protein) positional features, with - * locations converted to local sequence - * coordinates + * if true, include visible complementary (CDS/protein) positional + * features, with locations converted to local sequence coordinates * @return */ public String printJalviewFormat(SequenceI[] sequences, @@ -603,8 +602,8 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI .entrySet()) { FeatureColourI colour = featureColour.getValue(); - out.append(colour.toJalviewFormat(featureColour.getKey())).append( - newline); + out.append(colour.toJalviewFormat(featureColour.getKey())) + .append(newline); } } @@ -633,8 +632,9 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI /** * Outputs any visible complementary (CDS/peptide) positional features as - * Jalview format, within feature group. The coordinates of the linked features - * are converted to the corresponding positions of the local sequences. + * Jalview format, within feature group. The coordinates of the linked + * features are converted to the corresponding positions of the local + * sequences. * * @param out * @param fr @@ -685,7 +685,8 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI /* * output features by group */ - for (Entry>> groupFeatures : map.entrySet()) + for (Entry>> groupFeatures : map + .entrySet()) { out.append(newline); String group = groupFeatures.getKey(); @@ -737,7 +738,6 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI if (mf != null) { - MapList mapping = mf.mapping.getMap(); for (SequenceFeature sf : mf.features) { /* @@ -753,9 +753,7 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI found.add(sf); int begin = sf.getBegin(); int end = sf.getEnd(); - int[] range = mf.mapping.getTo() == seq.getDatasetSequence() - ? mapping.locateInTo(begin, end) - : mapping.locateInFrom(begin, end); + int[] range = mf.getMappedPositions(begin, end); SequenceFeature sf2 = new SequenceFeature(sf, range[0], range[1], group, sf.getScore()); complementary.add(sf2); @@ -768,8 +766,8 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI } /** - * Outputs any feature filters defined for visible feature types, sandwiched by - * STARTFILTERS and ENDFILTERS lines + * Outputs any feature filters defined for visible feature types, sandwiched + * by STARTFILTERS and ENDFILTERS lines * * @param out * @param visible @@ -819,9 +817,9 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI * @param includeNonPositional * @return */ - private int outputFeaturesByGroup(StringBuilder out, - FeatureRenderer fr, String[] featureTypes, - SequenceI[] sequences, boolean includeNonPositional) + private int outputFeaturesByGroup(StringBuilder out, FeatureRenderer fr, + String[] featureTypes, SequenceI[] sequences, + boolean includeNonPositional) { List featureGroups = fr.getFeatureGroups(); @@ -870,8 +868,8 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI if (featureTypes.length > 0 && (isNullGroup || visibleGroups.contains(group))) { - features.addAll(sequences[i].getFeatures().getFeaturesForGroup( - true, group, featureTypes)); + features.addAll(sequences[i].getFeatures() + .getFeaturesForGroup(true, group, featureTypes)); } for (SequenceFeature sf : features) @@ -909,9 +907,8 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI * @param sequenceName * @param sequenceFeature */ - protected void formatJalviewFeature( - StringBuilder out, String sequenceName, - SequenceFeature sequenceFeature) + protected void formatJalviewFeature(StringBuilder out, + String sequenceName, SequenceFeature sequenceFeature) { if (sequenceFeature.description == null || sequenceFeature.description.equals("")) @@ -1021,19 +1018,16 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI * Returns features output in GFF2 format * * @param sequences - * the sequences whose features are to be - * output + * the sequences whose features are to be output * @param visible - * a map whose keys are the type names of - * visible features + * a map whose keys are the type names of visible features * @param visibleFeatureGroups * @param includeNonPositionalFeatures * @param includeComplement * @return */ - public String printGffFormat(SequenceI[] sequences, - FeatureRenderer fr, boolean includeNonPositionalFeatures, - boolean includeComplement) + public String printGffFormat(SequenceI[] sequences, FeatureRenderer fr, + boolean includeNonPositionalFeatures, boolean includeComplement) { FeatureRenderer fr2 = null; if (includeComplement) @@ -1042,11 +1036,13 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI fr2 = Desktop.getAlignFrameFor(comp).getFeatureRenderer(); } - Map visibleColours = fr.getDisplayedFeatureCols(); + Map visibleColours = fr + .getDisplayedFeatureCols(); StringBuilder out = new StringBuilder(256); - out.append(String.format("%s %d\n", GFF_VERSION, gffVersion == 0 ? 2 : gffVersion)); + out.append(String.format("%s %d\n", GFF_VERSION, + gffVersion == 0 ? 2 : gffVersion)); String[] types = visibleColours == null ? new String[0] : visibleColours.keySet() @@ -1126,11 +1122,110 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI String phase = sf.getPhase(); out.append(phase == null ? "." : phase); - // miscellaneous key-values (GFF column 9) - String attributes = sf.getAttributes(); - if (attributes != null) + if (sf.otherDetails != null && !sf.otherDetails.isEmpty()) + { + Map map = sf.otherDetails; + formatAttributes(out, map); + } + } + + /** + * A helper method that outputs attributes stored in the map as + * semicolon-delimited values e.g. + * + *
+   * AC_Male=0;AF_NFE=0.00000e 00;Hom_FIN=0;GQ_MEDIAN=9
+   * 
+ * + * A map-valued attribute is formatted as a comma-delimited list within + * braces, for example + * + *
+   * jvmap_CSQ={ALLELE_NUM=1,UNIPARC=UPI0002841053,Feature=ENST00000585561}
+   * 
+ * + * The {@code jvmap_} prefix designates a values map and is removed if the + * value is parsed when read in. (The GFF3 specification allows + * 'semi-structured data' to be represented provided the attribute name begins + * with a lower case letter.) + * + * @param sb + * @param map + * @see http://gmod.org/wiki/GFF3#GFF3_Format + */ + void formatAttributes(StringBuilder sb, Map map) + { + sb.append(TAB); + boolean first = true; + for (String key : map.keySet()) + { + if (SequenceFeature.STRAND.equals(key) + || SequenceFeature.PHASE.equals(key)) + { + /* + * values stashed in map but output to their own columns + */ + continue; + } + { + if (!first) + { + sb.append(";"); + } + } + first = false; + Object value = map.get(key); + if (value instanceof Map) + { + formatMapAttribute(sb, key, (Map) value); + } + else + { + String formatted = StringUtils.urlEncode(value.toString(), + GffHelperI.GFF_ENCODABLE); + sb.append(key).append(EQUALS).append(formatted); + } + } + } + + /** + * Formats the map entries as + * + *
+   * key=key1=value1,key2=value2,...
+   * 
+ * + * and appends this to the string buffer + * + * @param sb + * @param key + * @param map + */ + private void formatMapAttribute(StringBuilder sb, String key, + Map map) + { + if (map == null || map.isEmpty()) + { + return; + } + + /* + * AbstractMap.toString would be a shortcut here, but more reliable + * to code the required format in case toString changes in future + */ + sb.append(key).append(EQUALS); + boolean first = true; + for (Entry entry : map.entrySet()) { - out.append(TAB).append(attributes); + if (!first) + { + sb.append(","); + } + first = false; + sb.append(entry.getKey().toString()).append(EQUALS); + String formatted = StringUtils.urlEncode(entry.getValue().toString(), + GffHelperI.GFF_ENCODABLE); + sb.append(formatted); } } @@ -1279,38 +1374,6 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI } /** - * Process the 'column 9' data of the GFF file. This is less formally defined, - * and its interpretation will vary depending on the tool that has generated - * it. - * - * @param attributes - * @param sf - */ - protected void processGffColumnNine(String attributes, SequenceFeature sf) - { - sf.setAttributes(attributes); - - /* - * Parse attributes in column 9 and add them to the sequence feature's - * 'otherData' table; use Note as a best proxy for description - */ - char nameValueSeparator = gffVersion == 3 ? '=' : ' '; - // TODO check we don't break GFF2 values which include commas here - Map> nameValues = GffHelperBase - .parseNameValuePairs(attributes, ";", nameValueSeparator, ","); - for (Entry> attr : nameValues.entrySet()) - { - String values = StringUtils.listToDelimitedString(attr.getValue(), - "; "); - sf.setValue(attr.getKey(), values); - if (NOTE.equals(attr.getKey())) - { - sf.setDescription(values); - } - } - } - - /** * After encountering ##fasta in a GFF3 file, process the remainder of the * file as FAST sequence data. Any placeholder sequences created during * feature parsing are updated with the actual sequences. @@ -1328,7 +1391,9 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI } catch (IOException q) { } - FastaFile parser = new FastaFile(this); + // Opening a FastaFile object with the remainder of this object's dataIn. + // Tell the constructor to NOT close the dataIn when finished. + FastaFile parser = new FastaFile(this, false); List includedseqs = parser.getSeqs(); SequenceIdMatcher smatcher = new SequenceIdMatcher(newseqs); @@ -1391,8 +1456,8 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI List sfs = seq.getFeatures().getPositionalFeatures(); if (!sfs.isEmpty()) { - String newName = (String) sfs.get(0).getValue( - GffHelperI.RENAME_TOKEN); + String newName = (String) sfs.get(0) + .getValue(GffHelperI.RENAME_TOKEN); if (newName != null) { seq.setName(newName);