X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fio%2FFeaturesFile.java;h=169da5a5ae8624885d772814aa049ede2be2e4ac;hb=refs%2Fheads%2Fbug%2FJAL-3049colourCellTooltip;hp=869b18bc01721aa00e591731ef71c619128820fe;hpb=1ec8fc3e8bc982115e5ba1b16a101d239a2df591;p=jalview.git diff --git a/src/jalview/io/FeaturesFile.java b/src/jalview/io/FeaturesFile.java index 869b18b..169da5a 100755 --- a/src/jalview/io/FeaturesFile.java +++ b/src/jalview/io/FeaturesFile.java @@ -31,6 +31,8 @@ import jalview.datamodel.AlignmentI; import jalview.datamodel.SequenceDummy; import jalview.datamodel.SequenceFeature; import jalview.datamodel.SequenceI; +import jalview.datamodel.features.FeatureMatcherSet; +import jalview.datamodel.features.FeatureMatcherSetI; import jalview.io.gff.GffHelperBase; import jalview.io.gff.GffHelperFactory; import jalview.io.gff.GffHelperI; @@ -45,13 +47,10 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; -import java.util.Comparator; import java.util.HashMap; -import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Map.Entry; -import java.util.Set; /** * Parses and writes features files, which may be in Jalview, GFF2 or GFF3 @@ -71,27 +70,22 @@ import java.util.Set; */ public class FeaturesFile extends AlignFile implements FeaturesSourceI { + private static final String TAB_REGEX = "\\t"; + + private static final String STARTGROUP = "STARTGROUP"; + + private static final String ENDGROUP = "ENDGROUP"; + + private static final String STARTFILTERS = "STARTFILTERS"; + + private static final String ENDFILTERS = "ENDFILTERS"; + private static final String ID_NOT_SPECIFIED = "ID_NOT_SPECIFIED"; private static final String NOTE = "Note"; - protected static final String TAB = "\t"; - protected static final String GFF_VERSION = "##gff-version"; - private static final Comparator SORT_NULL_LAST = new Comparator() - { - @Override - public int compare(String o1, String o2) - { - if (o1 == null) - { - return o2 == null ? 0 : 1; - } - return (o2 == null ? -1 : o1.compareTo(o2)); - } - }; - private AlignmentI lastmatchedAl = null; private SequenceIdMatcher matcher = null; @@ -110,14 +104,14 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI /** * Constructor which does not parse the file immediately * - * @param inFile + * @param file * @param paste * @throws IOException */ - public FeaturesFile(String inFile, DataSourceType paste) + public FeaturesFile(String file, DataSourceType paste) throws IOException { - super(false, inFile, paste); + super(false, file, paste); } /** @@ -133,15 +127,14 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI * Constructor that optionally parses the file immediately * * @param parseImmediately - * @param inFile + * @param file * @param type * @throws IOException */ - public FeaturesFile(boolean parseImmediately, String inFile, - DataSourceType type) - throws IOException + public FeaturesFile(boolean parseImmediately, String file, + DataSourceType type) throws IOException { - super(parseImmediately, inFile, type); + super(parseImmediately, file, type); } /** @@ -186,7 +179,7 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI * @param align * - alignment/dataset containing sequences that are to be annotated * @param colours - * - hashtable to store feature colour definitions + * - map to store feature colour definitions * @param removeHTML * - process html strings into plain text * @param relaxedIdmatching @@ -197,11 +190,34 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI Map colours, boolean removeHTML, boolean relaxedIdmatching) { - Map gffProps = new HashMap(); + return parse(align, colours, null, removeHTML, relaxedIdmatching); + } + + /** + * Parse GFF or Jalview format sequence features file + * + * @param align + * - alignment/dataset containing sequences that are to be annotated + * @param colours + * - map to store feature colour definitions + * @param filters + * - map to store feature filter definitions + * @param removeHTML + * - process html strings into plain text + * @param relaxedIdmatching + * - when true, ID matches to compound sequence IDs are allowed + * @return true if features were added + */ + public boolean parse(AlignmentI align, + Map colours, + Map filters, boolean removeHTML, + boolean relaxedIdmatching) + { + Map gffProps = new HashMap<>(); /* * keep track of any sequences we try to create from the data */ - List newseqs = new ArrayList(); + List newseqs = new ArrayList<>(); String line = null; try @@ -221,7 +237,7 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI continue; } - gffColumns = line.split("\\t"); // tab as regex + gffColumns = line.split(TAB_REGEX); if (gffColumns.length == 1) { if (line.trim().equalsIgnoreCase("GFF")) @@ -235,18 +251,23 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI } } - if (gffColumns.length > 1 && gffColumns.length < 4) + if (gffColumns.length > 0 && gffColumns.length < 4) { /* * if 2 or 3 tokens, we anticipate either 'startgroup', 'endgroup' or * a feature type colour specification */ String ft = gffColumns[0]; - if (ft.equalsIgnoreCase("startgroup")) + if (ft.equalsIgnoreCase(STARTFILTERS)) + { + parseFilters(filters); + continue; + } + if (ft.equalsIgnoreCase(STARTGROUP)) { featureGroup = gffColumns[1]; } - else if (ft.equalsIgnoreCase("endgroup")) + else if (ft.equalsIgnoreCase(ENDGROUP)) { // We should check whether this is the current group, // but at present there's no way of showing more than 1 group @@ -307,6 +328,43 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI } /** + * Reads input lines from STARTFILTERS to ENDFILTERS and adds a feature type + * filter to the map for each line parsed. After exit from this method, + * nextLine() should return the line after ENDFILTERS (or we are already at + * end of file if ENDFILTERS was missing). + * + * @param filters + * @throws IOException + */ + protected void parseFilters(Map filters) + throws IOException + { + String line; + while ((line = nextLine()) != null) + { + if (line.toUpperCase().startsWith(ENDFILTERS)) + { + return; + } + String[] tokens = line.split(TAB_REGEX); + if (tokens.length != 2) + { + System.err.println(String.format("Invalid token count %d for %d", + tokens.length, line)); + } + else + { + String featureType = tokens[0]; + FeatureMatcherSetI fm = FeatureMatcherSet.fromString(tokens[1]); + if (fm != null && filters != null) + { + filters.put(featureType, fm); + } + } + } + } + + /** * Try to parse a Jalview format feature specification and add it as a * sequence feature to any matching sequences in the alignment. Returns true * if successful (a feature was added), or false if not. @@ -321,7 +379,8 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI */ protected boolean parseJalviewFeature(String line, String[] gffColumns, AlignmentI alignment, Map featureColours, - boolean removeHTML, boolean relaxedIdMatching, String featureGroup) + boolean removeHTML, boolean relaxedIdMatching, + String featureGroup) { /* * tokens: description seqid seqIndex start end type [score] @@ -375,20 +434,23 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI Color colour = ColorUtils.createColourFromName(ft); featureColours.put(ft, new FeatureColour(colour)); } - SequenceFeature sf = new SequenceFeature(ft, desc, "", startPos, - endPos, featureGroup); + SequenceFeature sf = null; if (gffColumns.length > 6) { float score = Float.NaN; try { score = new Float(gffColumns[6]).floatValue(); - // update colourgradient bounds if allowed to } catch (NumberFormatException ex) { - // leave as NaN + sf = new SequenceFeature(ft, desc, startPos, endPos, featureGroup); } - sf.setScore(score); + sf = new SequenceFeature(ft, desc, startPos, endPos, score, + featureGroup); + } + else + { + sf = new SequenceFeature(ft, desc, startPos, endPos, featureGroup); } parseDescriptionHTML(sf, removeHTML); @@ -500,19 +562,26 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI } /** - * Returns contents of a Jalview format features file + * Returns contents of a Jalview format features file, for visible features, as + * filtered by type and group. Features with a null group are displayed if their + * feature type is visible. Non-positional features may optionally be included + * (with no check on type or group). * * @param sequences * source of features * @param visible * map of colour for each visible feature type + * @param featureFilters + * @param visibleFeatureGroups * @param includeNonPositional * if true, include non-positional features (regardless of group or * type) * @return */ public String printJalviewFormat(SequenceI[] sequences, - Map visible, boolean includeNonPositional) + Map visible, + Map featureFilters, + List visibleFeatureGroups, boolean includeNonPositional) { if (!includeNonPositional && (visible == null || visible.isEmpty())) { @@ -535,122 +604,208 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI } } - // Work out which groups are both present and visible - Set groups = new HashSet(); String[] types = visible == null ? new String[0] : visible.keySet() .toArray(new String[visible.keySet().size()]); - for (int i = 0; i < sequences.length; i++) + /* + * feature filters if any + */ + outputFeatureFilters(out, visible, featureFilters); + + /* + * sort groups alphabetically, and ensure that features with a + * null or empty group are output after those in named groups + */ + List sortedGroups = new ArrayList<>(visibleFeatureGroups); + sortedGroups.remove(null); + sortedGroups.remove(""); + Collections.sort(sortedGroups); + sortedGroups.add(null); + sortedGroups.add(""); + + boolean foundSome = false; + + /* + * first output any non-positional features + */ + if (includeNonPositional) { - groups.addAll(sequences[i].getFeatures() - .getFeatureGroups(true, types)); - if (includeNonPositional) + for (int i = 0; i < sequences.length; i++) { - groups.addAll(sequences[i].getFeatures().getFeatureGroups(false, - types)); + String sequenceName = sequences[i].getName(); + for (SequenceFeature feature : sequences[i].getFeatures() + .getNonPositionalFeatures()) + { + foundSome = true; + out.append(formatJalviewFeature(sequenceName, feature)); + } } } /* - * sort distinct groups so null group is output last + * positional features within groups */ - List sortedGroups = new ArrayList(groups); - Collections.sort(sortedGroups, SORT_NULL_LAST); + foundSome |= outputFeaturesByGroup(out, sortedGroups, types, sequences); + + return foundSome ? out.toString() : "No Features Visible"; + } + + /** + * Outputs any feature filters defined for visible feature types, sandwiched by + * STARTFILTERS and ENDFILTERS lines + * + * @param out + * @param visible + * @param featureFilters + */ + void outputFeatureFilters(StringBuilder out, + Map visible, + Map featureFilters) + { + if (visible == null || featureFilters == null + || featureFilters.isEmpty()) + { + return; + } - // TODO check where null group should be output + boolean first = true; + for (String featureType : visible.keySet()) + { + FeatureMatcherSetI filter = featureFilters.get(featureType); + if (filter != null) + { + if (first) + { + first = false; + out.append(newline).append(STARTFILTERS).append(newline); + } + out.append(featureType).append(TAB).append(filter.toStableString()) + .append(newline); + } + } + if (!first) + { + out.append(ENDFILTERS).append(newline).append(newline); + } + + } + + /** + * Appends output of sequence features within feature groups to the output + * buffer. Groups other than the null or empty group are sandwiched by + * STARTGROUP and ENDGROUP lines. + * + * @param out + * @param groups + * @param featureTypes + * @param sequences + * @return + */ + private boolean outputFeaturesByGroup(StringBuilder out, + List groups, String[] featureTypes, SequenceI[] sequences) + { boolean foundSome = false; - for (String group : sortedGroups) + for (String group : groups) { - if (group != null) + boolean isNamedGroup = (group != null && !"".equals(group)); + if (isNamedGroup) { out.append(newline); - out.append("STARTGROUP").append(TAB); + out.append(STARTGROUP).append(TAB); out.append(group); out.append(newline); } /* - * output features within groups (non-positional first if wanted) + * output positional features within groups */ for (int i = 0; i < sequences.length; i++) { - List features = new ArrayList(); - if (includeNonPositional) + String sequenceName = sequences[i].getName(); + List features = new ArrayList<>(); + if (featureTypes.length > 0) { features.addAll(sequences[i].getFeatures().getFeaturesForGroup( - false, group, types)); - } - if (types.length > 0) - { - features.addAll(sequences[i].getFeatures().getFeaturesForGroup( - true, group, types)); + true, group, featureTypes)); } for (SequenceFeature sequenceFeature : features) { - // we have features to output foundSome = true; - if (sequenceFeature.description == null - || sequenceFeature.description.equals("")) - { - out.append(sequenceFeature.type).append(TAB); - } - else - { - if (sequenceFeature.links != null - && sequenceFeature.getDescription().indexOf("") == -1) - { - out.append(""); - } - - out.append(sequenceFeature.description); - if (sequenceFeature.links != null) - { - for (int l = 0; l < sequenceFeature.links.size(); l++) - { - String label = sequenceFeature.links.elementAt(l); - String href = label.substring(label.indexOf("|") + 1); - label = label.substring(0, label.indexOf("|")); - - if (sequenceFeature.description.indexOf(href) == -1) - { - out.append(" " + label + ""); - } - } - - if (sequenceFeature.getDescription().indexOf("") == -1) - { - out.append(""); - } - } - - out.append(TAB); - } - out.append(sequences[i].getName()); - out.append("\t-1\t"); - out.append(sequenceFeature.begin); - out.append(TAB); - out.append(sequenceFeature.end); - out.append(TAB); - out.append(sequenceFeature.type); - if (!Float.isNaN(sequenceFeature.score)) - { - out.append(TAB); - out.append(sequenceFeature.score); - } - out.append(newline); + out.append(formatJalviewFeature(sequenceName, sequenceFeature)); } } - if (group != null) + if (isNamedGroup) { - out.append("ENDGROUP").append(TAB); + out.append(ENDGROUP).append(TAB); out.append(group); out.append(newline); } } + return foundSome; + } - return foundSome ? out.toString() : "No Features Visible"; + /** + * @param out + * @param sequenceName + * @param sequenceFeature + */ + protected String formatJalviewFeature( + String sequenceName, SequenceFeature sequenceFeature) + { + StringBuilder out = new StringBuilder(64); + if (sequenceFeature.description == null + || sequenceFeature.description.equals("")) + { + out.append(sequenceFeature.type).append(TAB); + } + else + { + if (sequenceFeature.links != null + && sequenceFeature.getDescription().indexOf("") == -1) + { + out.append(""); + } + + out.append(sequenceFeature.description); + if (sequenceFeature.links != null) + { + for (int l = 0; l < sequenceFeature.links.size(); l++) + { + String label = sequenceFeature.links.elementAt(l); + String href = label.substring(label.indexOf("|") + 1); + label = label.substring(0, label.indexOf("|")); + + if (sequenceFeature.description.indexOf(href) == -1) + { + out.append(" " + label + ""); + } + } + + if (sequenceFeature.getDescription().indexOf("") == -1) + { + out.append(""); + } + } + + out.append(TAB); + } + out.append(sequenceName); + out.append("\t-1\t"); + out.append(sequenceFeature.begin); + out.append(TAB); + out.append(sequenceFeature.end); + out.append(TAB); + out.append(sequenceFeature.type); + if (!Float.isNaN(sequenceFeature.score)) + { + out.append(TAB); + out.append(sequenceFeature.score); + } + out.append(newline); + + return out.toString(); } /** @@ -677,7 +832,7 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI dataset = new Alignment(new SequenceI[] {}); } - Map featureColours = new HashMap(); + Map featureColours = new HashMap<>(); boolean parseResult = parse(dataset, featureColours, false, true); if (!parseResult) { @@ -712,76 +867,84 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI * the sequences whose features are to be output * @param visible * a map whose keys are the type names of visible features + * @param visibleFeatureGroups * @param includeNonPositionalFeatures * @return */ public String printGffFormat(SequenceI[] sequences, Map visible, + List visibleFeatureGroups, boolean includeNonPositionalFeatures) { StringBuilder out = new StringBuilder(256); - int version = gffVersion == 0 ? 2 : gffVersion; - out.append(String.format("%s %d\n", GFF_VERSION, version)); - String source; - boolean isnonpos; + + out.append(String.format("%s %d\n", GFF_VERSION, gffVersion == 0 ? 2 : gffVersion)); + + if (!includeNonPositionalFeatures + && (visible == null || visible.isEmpty())) + { + return out.toString(); + } + + String[] types = visible == null ? new String[0] : visible.keySet() + .toArray( + new String[visible.keySet().size()]); + for (SequenceI seq : sequences) { - SequenceFeature[] features = seq.getSequenceFeatures(); - if (features != null) + List features = new ArrayList<>(); + if (includeNonPositionalFeatures) { - for (SequenceFeature sf : features) - { - isnonpos = sf.begin == 0 && sf.end == 0; - if (!includeNonPositionalFeatures && isnonpos) - { - /* - * ignore non-positional features if not wanted - */ - continue; - } - if (!isnonpos && !visible.containsKey(sf.type)) - { - /* - * ignore not visible features if not wanted - */ - continue; - } + features.addAll(seq.getFeatures().getNonPositionalFeatures()); + } + if (visible != null && !visible.isEmpty()) + { + features.addAll(seq.getFeatures().getPositionalFeatures(types)); + } - source = sf.featureGroup; - if (source == null) - { - source = sf.getDescription(); - } + for (SequenceFeature sf : features) + { + String source = sf.featureGroup; + if (!sf.isNonPositional() && source != null + && !visibleFeatureGroups.contains(source)) + { + // group is not visible + continue; + } - out.append(seq.getName()); - out.append(TAB); - out.append(source); - out.append(TAB); - out.append(sf.type); - out.append(TAB); - out.append(sf.begin); - out.append(TAB); - out.append(sf.end); - out.append(TAB); - out.append(sf.score); - out.append(TAB); - - int strand = sf.getStrand(); - out.append(strand == 1 ? "+" : (strand == -1 ? "-" : ".")); - out.append(TAB); - - String phase = sf.getPhase(); - out.append(phase == null ? "." : phase); - - // miscellaneous key-values (GFF column 9) - String attributes = sf.getAttributes(); - if (attributes != null) - { - out.append(TAB).append(attributes); - } + if (source == null) + { + source = sf.getDescription(); + } - out.append(newline); + out.append(seq.getName()); + out.append(TAB); + out.append(source); + out.append(TAB); + out.append(sf.type); + out.append(TAB); + out.append(sf.begin); + out.append(TAB); + out.append(sf.end); + out.append(TAB); + out.append(sf.score); + out.append(TAB); + + int strand = sf.getStrand(); + out.append(strand == 1 ? "+" : (strand == -1 ? "-" : ".")); + out.append(TAB); + + String phase = sf.getPhase(); + out.append(phase == null ? "." : phase); + + // miscellaneous key-values (GFF column 9) + String attributes = sf.getAttributes(); + if (attributes != null) + { + out.append(TAB).append(attributes); } + + out.append(newline); } } @@ -843,8 +1006,8 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI fromCount = Integer.parseInt(tokens[2]); } catch (NumberFormatException nfe) { - throw new IOException("Invalid number in Align field: " - + nfe.getMessage()); + throw new IOException( + "Invalid number in Align field: " + nfe.getMessage()); } /* @@ -1042,10 +1205,11 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI // rename sequences if GFF handler requested this // TODO a more elegant way e.g. gffHelper.postProcess(newseqs) ? - SequenceFeature[] sfs = seq.getSequenceFeatures(); - if (sfs != null) + List sfs = seq.getFeatures().getPositionalFeatures(); + if (!sfs.isEmpty()) { - String newName = (String) sfs[0].getValue(GffHelperI.RENAME_TOKEN); + String newName = (String) sfs.get(0).getValue( + GffHelperI.RENAME_TOKEN); if (newName != null) { seq.setName(newName); @@ -1064,9 +1228,8 @@ public class FeaturesFile extends AlignFile implements FeaturesSourceI * @param newseqs * @throws IOException */ - protected void processGffPragma(String line, - Map gffProps, AlignmentI align, - List newseqs) throws IOException + protected void processGffPragma(String line, Map gffProps, + AlignmentI align, List newseqs) throws IOException { line = line.trim(); if ("###".equals(line))