X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fio%2FFeaturesFile.java;h=821384a600dc2176a332ef00e5bbd18452fc5d27;hb=6200addf078b7f7ace90597dc056dafc7fc602c1;hp=aa21b0f653f870544c337266c37e8fe8f1fe5fe1;hpb=4b1c969e87feaefd4fb9c49ba3d6b828b3ce1a9c;p=jalview.git diff --git a/src/jalview/io/FeaturesFile.java b/src/jalview/io/FeaturesFile.java index aa21b0f..821384a 100755 --- a/src/jalview/io/FeaturesFile.java +++ b/src/jalview/io/FeaturesFile.java @@ -29,12 +29,13 @@ import jalview.api.FeaturesSourceI; import jalview.datamodel.AlignedCodonFrame; import jalview.datamodel.Alignment; import jalview.datamodel.AlignmentI; +import jalview.datamodel.MappedFeatures; import jalview.datamodel.SequenceDummy; import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceI; import jalview.datamodel.features.FeatureMatcherSet; import jalview.datamodel.features.FeatureMatcherSetI; -import jalview.io.gff.GffHelperBase; +import jalview.gui.Desktop; import jalview.io.gff.GffHelperFactory; import jalview.io.gff.GffHelperI; import jalview.schemes.FeatureColour; @@ -49,9 +50,11 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; +import java.util.TreeMap; /** * Parses and writes features files, which may be in Jalview, GFF2 or GFF3 @@ -71,6 +74,8 @@ import java.util.Map.Entry; */ public class FeaturesFile extends AlignFile implements FeaturesSourceI { + private static final String EQUALS = "="; + private static final String TAB_REGEX = "\\t"; private static final String STARTGROUP = "STARTGROUP"; @@ -441,7 +446,7 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI float score = Float.NaN; try { - score = new Float(gffColumns[6]).floatValue(); + score = Float.valueOf(gffColumns[6]).floatValue(); } catch (NumberFormatException ex) { sf = new SequenceFeature(ft, desc, startPos, endPos, featureGroup); @@ -563,31 +568,38 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI } /** - * Returns contents of a Jalview format features file, for visible features, - * as filtered by type and group. Features with a null group are displayed if - * their feature type is visible. Non-positional features may optionally be - * included (with no check on type or group). + * Returns contents of a Jalview format features file, for visible features, as + * filtered by type and group. Features with a null group are displayed if their + * feature type is visible. Non-positional features may optionally be included + * (with no check on type or group). * * @param sequences * @param fr * @param includeNonPositional - * if true, include non-positional features (regardless of group or - * type) + * if true, include non-positional features + * (regardless of group or type) + * @param includeComplement + * if true, include visible complementary + * (CDS/protein) positional features, with + * locations converted to local sequence + * coordinates * @return */ public String printJalviewFormat(SequenceI[] sequences, - FeatureRenderer fr, boolean includeNonPositional) + FeatureRenderer fr, boolean includeNonPositional, + boolean includeComplement) { Map visibleColours = fr .getDisplayedFeatureCols(); Map featureFilters = fr.getFeatureFilters(); - if (!includeNonPositional - && (visibleColours == null || visibleColours.isEmpty())) - { - // no point continuing. - return "No Features Visible"; - } + // BH check this is out? +// if (!includeNonPositional +// && (visibleColours == null || visibleColours.isEmpty())) +// { +// // no point continuing. +// return "No Features Visible"; +// } /* * write out feature colours (if we know them) @@ -620,10 +632,151 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI int count = outputFeaturesByGroup(out, fr, types, sequences, includeNonPositional); + if (includeComplement) + { + count += outputComplementFeatures(out, fr, sequences); + } + return count > 0 ? out.toString() : "No Features Visible"; } /** + * Outputs any visible complementary (CDS/peptide) positional features as + * Jalview format, within feature group. The coordinates of the linked features + * are converted to the corresponding positions of the local sequences. + * + * @param out + * @param fr + * @param sequences + * @return + */ + private int outputComplementFeatures(StringBuilder out, + FeatureRenderer fr, SequenceI[] sequences) + { + AlignViewportI comp = fr.getViewport().getCodingComplement(); + FeatureRenderer fr2 = Desktop.getAlignFrameFor(comp) + .getFeatureRenderer(); + + /* + * bin features by feature group and sequence + */ + Map>> map = new TreeMap<>( + String.CASE_INSENSITIVE_ORDER); + int count = 0; + + for (SequenceI seq : sequences) + { + /* + * find complementary features + */ + List complementary = findComplementaryFeatures(seq, + fr2); + String seqName = seq.getName(); + + for (SequenceFeature sf : complementary) + { + String group = sf.getFeatureGroup(); + if (!map.containsKey(group)) + { + map.put(group, new LinkedHashMap<>()); // preserves sequence order + } + Map> groupFeatures = map.get(group); + if (!groupFeatures.containsKey(seqName)) + { + groupFeatures.put(seqName, new ArrayList<>()); + } + List foundFeatures = groupFeatures.get(seqName); + foundFeatures.add(sf); + count++; + } + } + + /* + * output features by group + */ + for (Entry>> groupFeatures : map.entrySet()) + { + out.append(newline); + String group = groupFeatures.getKey(); + if (!"".equals(group)) + { + out.append(STARTGROUP).append(TAB).append(group).append(newline); + } + Map> seqFeaturesMap = groupFeatures + .getValue(); + for (Entry> seqFeatures : seqFeaturesMap + .entrySet()) + { + String sequenceName = seqFeatures.getKey(); + for (SequenceFeature sf : seqFeatures.getValue()) + { + formatJalviewFeature(out, sequenceName, sf); + } + } + if (!"".equals(group)) + { + out.append(ENDGROUP).append(TAB).append(group).append(newline); + } + } + + return count; + } + + /** + * Answers a list of mapped features visible in the (CDS/protein) complement, + * with feature positions translated to local sequence coordinates + * + * @param seq + * @param fr2 + * @return + */ + protected List findComplementaryFeatures(SequenceI seq, + FeatureRenderer fr2) + { + /* + * avoid duplication of features (e.g. peptide feature + * at all 3 mapped codon positions) + */ + List found = new ArrayList<>(); + List complementary = new ArrayList<>(); + + for (int pos = seq.getStart(); pos <= seq.getEnd(); pos++) + { + MappedFeatures mf = fr2.findComplementFeaturesAtResidue(seq, pos); + + if (mf != null) + { + MapList mapping = mf.mapping.getMap(); + for (SequenceFeature sf : mf.features) + { + /* + * make a virtual feature with local coordinates + */ + if (!found.contains(sf)) + { + String group = sf.getFeatureGroup(); + if (group == null) + { + group = ""; + } + found.add(sf); + int begin = sf.getBegin(); + int end = sf.getEnd(); + int[] range = mf.mapping.getTo() == seq.getDatasetSequence() + ? mapping.locateInTo(begin, end) + : mapping.locateInFrom(begin, end); + SequenceFeature sf2 = new SequenceFeature(sf, range[0], + range[1], group, sf.getScore()); + complementary.add(sf2); + } + } + } + } + + return complementary; + } + + /** * Outputs any feature filters defined for visible feature types, sandwiched by * STARTFILTERS and ENDFILTERS lines * @@ -745,7 +898,7 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI } } firstInGroup = false; - out.append(formatJalviewFeature(sequenceName, sf)); + formatJalviewFeature(out, sequenceName, sf); } } } @@ -759,14 +912,16 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI } /** + * Formats one feature in Jalview format and appends to the string buffer + * * @param out * @param sequenceName * @param sequenceFeature */ protected String formatJalviewFeature( - String sequenceName, SequenceFeature sequenceFeature) + StringBuilder out, String sequenceName, + SequenceFeature sequenceFeature) { - StringBuilder out = new StringBuilder(64); if (sequenceFeature.description == null || sequenceFeature.description.equals("")) { @@ -791,7 +946,8 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI if (sequenceFeature.description.indexOf(href) == -1) { - out.append(" " + label + ""); + out.append(" ") + .append(label).append(""); } } @@ -882,23 +1038,25 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI * a map whose keys are the type names of visible features * @param visibleFeatureGroups * @param includeNonPositionalFeatures + * @param includeComplement * @return */ public String printGffFormat(SequenceI[] sequences, - FeatureRenderer fr, boolean includeNonPositionalFeatures) + FeatureRenderer fr, boolean includeNonPositionalFeatures, + boolean includeComplement) + { + FeatureRenderer fr2 = null; + if (includeComplement) { + AlignViewportI comp = fr.getViewport().getCodingComplement(); + fr2 = Desktop.getAlignFrameFor(comp).getFeatureRenderer(); + } + Map visibleColours = fr.getDisplayedFeatureCols(); StringBuilder out = new StringBuilder(256); - out.append(String.format("%s %d" + newline, GFF_VERSION, - gffVersion == 0 ? 2 : gffVersion)); - - if (!includeNonPositionalFeatures - && (visibleColours == null || visibleColours.isEmpty())) - { - return out.toString(); - } + out.append(String.format("%s %d\n", GFF_VERSION, gffVersion == 0 ? 2 : gffVersion)); String[] types = visibleColours == null ? new String[0] : visibleColours.keySet() @@ -906,6 +1064,7 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI for (SequenceI seq : sequences) { + List seqFeatures = new ArrayList<>(); List features = new ArrayList<>(); if (includeNonPositionalFeatures) { @@ -918,15 +1077,40 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI for (SequenceFeature sf : features) { - if (!sf.isNonPositional() && !fr.isVisible(sf)) + if (sf.isNonPositional() || fr.isVisible(sf)) { /* - * feature hidden by group visibility, colour threshold, + * drop features hidden by group visibility, colour threshold, * or feature filter condition */ - continue; + seqFeatures.add(sf); } + } + + if (includeComplement) + { + seqFeatures.addAll(findComplementaryFeatures(seq, fr2)); + } + + /* + * sort features here if wanted + */ + for (SequenceFeature sf : seqFeatures) + { + formatGffFeature(out, seq, sf); + out.append(newline); + } + } + return out.toString(); + } + + /** + * Formats one feature as GFF and appends to the string buffer + */ + private void formatGffFeature(StringBuilder out, SequenceI seq, + SequenceFeature sf) + { String source = sf.featureGroup; if (source == null) { @@ -953,18 +1137,111 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI String phase = sf.getPhase(); out.append(phase == null ? "." : phase); - // miscellaneous key-values (GFF column 9) - String attributes = sf.getAttributes(); - if (attributes != null) + if (sf.otherDetails != null && !sf.otherDetails.isEmpty()) { - out.append(TAB).append(attributes); - } + Map map = sf.otherDetails; + formatAttributes(out, map); + } + } - out.append(newline); + /** + * A helper method that outputs attributes stored in the map as + * semicolon-delimited values e.g. + * + *
+   * AC_Male=0;AF_NFE=0.00000e 00;Hom_FIN=0;GQ_MEDIAN=9
+   * 
+ * + * A map-valued attribute is formatted as a comma-delimited list within braces, + * for example + * + *
+   * jvmap_CSQ={ALLELE_NUM=1,UNIPARC=UPI0002841053,Feature=ENST00000585561}
+   * 
+ * + * The {@code jvmap_} prefix designates a values map and is removed if the value + * is parsed when read in. (The GFF3 specification allows 'semi-structured data' + * to be represented provided the attribute name begins with a lower case + * letter.) + * + * @param sb + * @param map + * @see http://gmod.org/wiki/GFF3#GFF3_Format + */ + void formatAttributes(StringBuilder sb, Map map) + { + sb.append(TAB); + boolean first = true; + for (String key : map.keySet()) + { + if (SequenceFeature.STRAND.equals(key) + || SequenceFeature.PHASE.equals(key)) + { + /* + * values stashed in map but output to their own columns + */ + continue; + } + { + if (!first) + { + sb.append(";"); + } + } + first = false; + Object value = map.get(key); + if (value instanceof Map) + { + formatMapAttribute(sb, key, (Map) value); + } + else + { + String formatted = StringUtils.urlEncode(value.toString(), + GffHelperI.GFF_ENCODABLE); + sb.append(key).append(EQUALS).append(formatted); } } + } - return out.toString(); + /** + * Formats the map entries as + * + *
+   * key=key1=value1,key2=value2,...
+   * 
+ * + * and appends this to the string buffer + * + * @param sb + * @param key + * @param map + */ + private void formatMapAttribute(StringBuilder sb, String key, + Map map) + { + if (map == null || map.isEmpty()) + { + return; + } + + /* + * AbstractMap.toString would be a shortcut here, but more reliable + * to code the required format in case toString changes in future + */ + sb.append(key).append(EQUALS); + boolean first = true; + for (Entry entry : map.entrySet()) + { + if (!first) + { + sb.append(","); + } + first = false; + sb.append(entry.getKey().toString()).append(EQUALS); + String formatted = StringUtils.urlEncode(entry.getValue().toString(), + GffHelperI.GFF_ENCODABLE); + sb.append(formatted); + } } /** @@ -1111,37 +1388,38 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI return seq; } - /** - * Process the 'column 9' data of the GFF file. This is less formally defined, - * and its interpretation will vary depending on the tool that has generated - * it. - * - * @param attributes - * @param sf - */ - protected void processGffColumnNine(String attributes, SequenceFeature sf) - { - sf.setAttributes(attributes); - - /* - * Parse attributes in column 9 and add them to the sequence feature's - * 'otherData' table; use Note as a best proxy for description - */ - char nameValueSeparator = gffVersion == 3 ? '=' : ' '; - // TODO check we don't break GFF2 values which include commas here - Map> nameValues = GffHelperBase - .parseNameValuePairs(attributes, ";", nameValueSeparator, ","); - for (Entry> attr : nameValues.entrySet()) - { - String values = StringUtils.listToDelimitedString(attr.getValue(), - "; "); - sf.setValue(attr.getKey(), values); - if (NOTE.equals(attr.getKey())) - { - sf.setDescription(values); - } - } - } + // BH! check that we did not lose something here. +// /** +// * Process the 'column 9' data of the GFF file. This is less formally defined, +// * and its interpretation will vary depending on the tool that has generated +// * it. +// * +// * @param attributes +// * @param sf +// */ +// protected void processGffColumnNine(String attributes, SequenceFeature sf) +// { +// sf.setAttributes(attributes); +// +// /* +// * Parse attributes in column 9 and add them to the sequence feature's +// * 'otherData' table; use Note as a best proxy for description +// */ +// char nameValueSeparator = gffVersion == 3 ? '=' : ' '; +// // TODO check we don't break GFF2 values which include commas here +// Map> nameValues = GffHelperBase +// .parseNameValuePairs(attributes, ";", nameValueSeparator, ","); +// for (Entry> attr : nameValues.entrySet()) +// { +// String values = StringUtils.listToDelimitedString(attr.getValue(), +// "; "); +// sf.setValue(attr.getKey(), values); +// if (NOTE.equals(attr.getKey())) +// { +// sf.setDescription(values); +// } +// } +// } /** * After encountering ##fasta in a GFF3 file, process the remainder of the