X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=src%2Fjalview%2Fdatamodel%2FSequenceFeature.java;h=bbf1b4515595fdc5837e1384a47cb046259237e1;hb=08c7bee16c16563cc7cec7ea4d336b3e0c4c937a;hp=e81292bc5340958255277c15d078c311c2724b95;hpb=2026a420963f094072607c0495b6369ba96e60c0;p=jalview.git diff --git a/src/jalview/datamodel/SequenceFeature.java b/src/jalview/datamodel/SequenceFeature.java index e81292b..bbf1b45 100755 --- a/src/jalview/datamodel/SequenceFeature.java +++ b/src/jalview/datamodel/SequenceFeature.java @@ -1,169 +1,308 @@ /* - * Jalview - A Sequence Alignment Editor and Viewer (Version 2.7) - * Copyright (C) 2011 J Procter, AM Waterhouse, G Barton, M Clamp, S Searle + * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$) + * Copyright (C) $$Year-Rel$$ The Jalview Authors * * This file is part of Jalview. * * Jalview is free software: you can redistribute it and/or * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. - * + * as published by the Free Software Foundation, either version 3 + * of the License, or (at your option) any later version. + * * Jalview is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty * of MERCHANTABILITY or FITNESS FOR A PARTICULAR * PURPOSE. See the GNU General Public License for more details. * - * You should have received a copy of the GNU General Public License along with Jalview. If not, see . + * You should have received a copy of the GNU General Public License + * along with Jalview. If not, see . + * The Jalview Authors are detailed in the 'AUTHORS' file. */ package jalview.datamodel; -import java.util.*; +import java.util.Comparator; +import java.util.LinkedHashMap; +import java.util.Map; +import java.util.Map.Entry; +import java.util.SortedMap; +import java.util.TreeMap; +import java.util.Vector; + +import jalview.datamodel.features.FeatureAttributeType; +import jalview.datamodel.features.FeatureAttributes; +import jalview.datamodel.features.FeatureLocationI; +import jalview.datamodel.features.FeatureSourceI; +import jalview.datamodel.features.FeatureSources; +import jalview.util.StringUtils; /** - * DOCUMENT ME! - * - * @author $author$ - * @version $Revision$ + * A class that models a single contiguous feature on a sequence. If flag + * 'contactFeature' is true, the start and end positions are interpreted instead + * as two contact points. */ -public class SequenceFeature +public class SequenceFeature implements FeatureLocationI { - public int begin; + /* + * score value if none is set; preferably Float.Nan, but see + * JAL-2060 and JAL-2554 for a couple of blockers to that + */ + private static final float NO_SCORE = 0f; + + private static final String STATUS = "status"; - public int end; + public static final String STRAND = "STRAND"; - public float score; + // key for Phase designed not to conflict with real GFF data + public static final String PHASE = "!Phase"; - public String type; + // private key for ENA location designed not to conflict with real GFF data + private static final String LOCATION = "!Location"; + + private static final String ROW_DATA = "%s%s%s"; + + /* + * type, begin, end, featureGroup, score and contactFeature are final + * to ensure that the integrity of SequenceFeatures data store + * can't be broken by direct update of these fields + */ + public final String type; + + public final int begin; + + public final int end; + + public final String featureGroup; + + public final float score; + + private final boolean contactFeature; public String description; - public Hashtable otherDetails; + /* + * a map of key-value pairs; may be populated from GFF 'column 9' data, + * other data sources (e.g. GenBank file), or programmatically + */ + public Map otherDetails; - public java.util.Vector links; - - // Feature group can be set from a features file - // as a group of features between STARTGROUP and ENDGROUP markers - public String featureGroup; + public Vector links; - public SequenceFeature() - { - } + /* + * the identifier (if known) for the FeatureSource held in FeatureSources, + * as a provider of metadata about feature attributes + */ + private String source; /** - * Constructs a duplicate feature. Note: Uses clone on the otherDetails so - * only shallow copies are made of additional properties and method will - * silently fail if unclonable objects are found in the hash. + * Constructs a duplicate feature. Note: Uses makes a shallow copy of the + * otherDetails map, so the new and original SequenceFeature may reference the + * same objects in the map. * * @param cpy */ public SequenceFeature(SequenceFeature cpy) { - if (cpy != null) + this(cpy, cpy.getBegin(), cpy.getEnd(), cpy.getFeatureGroup(), cpy + .getScore()); + } + + /** + * Constructor + * + * @param theType + * @param theDesc + * @param theBegin + * @param theEnd + * @param group + */ + public SequenceFeature(String theType, String theDesc, int theBegin, + int theEnd, String group) + { + this(theType, theDesc, theBegin, theEnd, NO_SCORE, group); + } + + /** + * Constructor including a score value + * + * @param theType + * @param theDesc + * @param theBegin + * @param theEnd + * @param theScore + * @param group + */ + public SequenceFeature(String theType, String theDesc, int theBegin, + int theEnd, float theScore, String group) + { + this.type = theType; + this.description = theDesc; + this.begin = theBegin; + this.end = theEnd; + this.featureGroup = group; + this.score = theScore; + + /* + * for now, only "Disulfide/disulphide bond" is treated as a contact feature + */ + this.contactFeature = "disulfide bond".equalsIgnoreCase(type) + || "disulphide bond".equalsIgnoreCase(type); + } + + /** + * A copy constructor that allows the value of final fields to be 'modified' + * + * @param sf + * @param newType + * @param newBegin + * @param newEnd + * @param newGroup + * @param newScore + */ + public SequenceFeature(SequenceFeature sf, String newType, int newBegin, + int newEnd, String newGroup, float newScore) + { + this(newType, sf.getDescription(), newBegin, newEnd, newScore, + newGroup); + + this.source = sf.source; + + if (sf.otherDetails != null) { - begin = cpy.begin; - end = cpy.end; - score = cpy.score; - if (cpy.type != null) - { - type = new String(cpy.type); - } - if (cpy.description != null) - { - description = new String(cpy.description); - } - if (cpy.featureGroup != null) - { - featureGroup = new String(cpy.featureGroup); - } - if (cpy.otherDetails != null) - { - try - { - otherDetails = (Hashtable) cpy.otherDetails.clone(); - } catch (Exception e) - { - // Uncloneable objects in the otherDetails - don't complain - } - } - if (cpy.links != null && cpy.links.size() > 0) - { - links = new Vector(); - for (int i = 0, iSize = cpy.links.size(); i < iSize; i++) - { - links.addElement(cpy.links.elementAt(i)); - } - } + otherDetails = new LinkedHashMap<>(); + otherDetails.putAll(sf.otherDetails); + } + if (sf.links != null && sf.links.size() > 0) + { + links = new Vector<>(); + links.addAll(sf.links); } } - - public SequenceFeature(String type, String desc, String status, - int begin, int end, String featureGroup) + + /** + * A copy constructor that allows the value of final fields to be 'modified' + * + * @param sf + * @param newBegin + * @param newEnd + * @param newGroup + * @param newScore + */ + public SequenceFeature(SequenceFeature sf, int newBegin, int newEnd, + String newGroup, float newScore) { - this.type = type; - this.description = desc; - setValue("status", status); - this.begin = begin; - this.end = end; - this.featureGroup = featureGroup; + this(sf, sf.getType(), newBegin, newEnd, newGroup, newScore); } - public SequenceFeature(String type, String desc, int begin, int end, - float score, String featureGroup) + /** + * Two features are considered equal if they have the same type, group, + * description, start, end, phase, strand, and (if present) 'Name', ID' and + * 'Parent' attributes. + * + * Note we need to check Parent to distinguish the same exon occurring in + * different transcripts (in Ensembl GFF). This allows assembly of transcript + * sequences from their component exon regions. + */ + @Override + public boolean equals(Object o) { - this.type = type; - this.description = desc; - this.begin = begin; - this.end = end; - this.score = score; - this.featureGroup = featureGroup; + return equals(o, false); } - public boolean equals(SequenceFeature sf) + /** + * Overloaded method allows the equality test to optionally ignore the + * 'Parent' attribute of a feature. This supports avoiding adding many + * superficially duplicate 'exon' or CDS features to genomic or protein + * sequence. + * + * @param o + * @param ignoreParent + * @return + */ + public boolean equals(Object o, boolean ignoreParent) { - if (begin != sf.begin || end != sf.end || score != sf.score) + if (o == null || !(o instanceof SequenceFeature)) + { + return false; + } + + SequenceFeature sf = (SequenceFeature) o; + boolean sameScore = Float.isNaN(score) ? Float.isNaN(sf.score) + : score == sf.score; + if (begin != sf.begin || end != sf.end || !sameScore) { return false; } - if (!(type + description + featureGroup).equals(sf.type - + sf.description + sf.featureGroup)) + if (getStrand() != sf.getStrand()) { return false; } + if (!(type + description + featureGroup + getPhase()).equals( + sf.type + sf.description + sf.featureGroup + sf.getPhase())) + { + return false; + } + if (!equalAttribute(getValue("ID"), sf.getValue("ID"))) + { + return false; + } + if (!equalAttribute(getValue("Name"), sf.getValue("Name"))) + { + return false; + } + if (!ignoreParent) + { + if (!equalAttribute(getValue("Parent"), sf.getValue("Parent"))) + { + return false; + } + } return true; } /** + * Returns true if both values are null, are both non-null and equal + * + * @param att1 + * @param att2 + * @return + */ + protected static boolean equalAttribute(Object att1, Object att2) + { + if (att1 == null && att2 == null) + { + return true; + } + if (att1 != null) + { + return att1.equals(att2); + } + return att2.equals(att1); + } + + /** * DOCUMENT ME! * * @return DOCUMENT ME! */ + @Override public int getBegin() { return begin; } - public void setBegin(int start) - { - this.begin = start; - } - /** * DOCUMENT ME! * * @return DOCUMENT ME! */ + @Override public int getEnd() { return end; } - public void setEnd(int end) - { - this.end = end; - } - /** * DOCUMENT ME! * @@ -174,11 +313,6 @@ public class SequenceFeature return type; } - public void setType(String type) - { - this.type = type; - } - /** * DOCUMENT ME! * @@ -199,19 +333,22 @@ public class SequenceFeature return featureGroup; } - public void setFeatureGroup(String featureGroup) - { - this.featureGroup = featureGroup; - } - + /** + * Adds a hyperlink for the feature. This should have the format label|url. + * + * @param labelLink + */ public void addLink(String labelLink) { if (links == null) { - links = new java.util.Vector(); + links = new Vector<>(); } - links.insertElementAt(labelLink, 0); + if (!links.contains(labelLink)) + { + links.insertElementAt(labelLink, 0); + } } public float getScore() @@ -219,13 +356,8 @@ public class SequenceFeature return score; } - public void setScore(float value) - { - score = value; - } - /** - * Used for getting values which are not in the basic set. eg STRAND, FRAME + * Used for getting values which are not in the basic set. eg STRAND, PHASE * for GFF file * * @param key @@ -244,6 +376,44 @@ public class SequenceFeature } /** + * Answers the value of the specified attribute as string, or null if no such + * value. If more than one attribute name is provided, tries to resolve as keys + * to nested maps. For example, if attribute "CSQ" holds a map of key-value + * pairs, then getValueAsString("CSQ", "Allele") returns the value of "Allele" + * in that map. + * + * @param key + * @return + */ + public String getValueAsString(String... key) + { + if (otherDetails == null) + { + return null; + } + Object value = otherDetails.get(key[0]); + if (key.length > 1 && value instanceof Map) + { + value = ((Map) value).get(key[1]); + } + return value == null ? null : value.toString(); + } + + /** + * Returns a property value for the given key if known, else the specified + * default value + * + * @param key + * @param defaultValue + * @return + */ + public Object getValue(String key, Object defaultValue) + { + Object value = getValue(key); + return value == null ? defaultValue : value; + } + + /** * Used for setting values which are not in the basic set. eg STRAND, FRAME * for GFF file * @@ -258,42 +428,341 @@ public class SequenceFeature { if (otherDetails == null) { - otherDetails = new Hashtable(); + /* + * LinkedHashMap preserves insertion order of attributes + */ + otherDetails = new LinkedHashMap<>(); } otherDetails.put(key, value); + recordAttribute(key, value); } } + /** + * Notifies the addition of a feature attribute. This lets us keep track of + * which attributes are present on each feature type, and also the range of + * numerical-valued attributes. + * + * @param key + * @param value + */ + protected void recordAttribute(String key, Object value) + { + String attDesc = null; + if (source != null) + { + attDesc = FeatureSources.getInstance().getSource(source) + .getAttributeName(key); + } + + FeatureAttributes.getInstance().addAttribute(this.type, attDesc, value, + key); + } + /* * The following methods are added to maintain the castor Uniprot mapping file * for the moment. */ public void setStatus(String status) { - setValue("status", status); + setValue(STATUS, status); } public String getStatus() { + return (String) getValue(STATUS); + } + + /** + * Return 1 for forward strand ('+' in GFF), -1 for reverse strand ('-' in + * GFF), and 0 for unknown or not (validly) specified + * + * @return + */ + public int getStrand() + { + int strand = 0; if (otherDetails != null) { - String stat = (String) otherDetails.get("status"); - if (stat != null) - return new String(stat); + Object str = otherDetails.get(STRAND); + if ("-".equals(str)) + { + strand = -1; + } + else if ("+".equals(str)) + { + strand = 1; + } } - return null; + return strand; + } + + /** + * Set the value of strand + * + * @param strand + * should be "+" for forward, or "-" for reverse + */ + public void setStrand(String strand) + { + setValue(STRAND, strand); } - public void setPosition(int pos) + public void setPhase(String phase) { - begin = pos; - end = pos; + setValue(PHASE, phase); } - public int getPosition() + public String getPhase() { - return begin; + return (String) getValue(PHASE); + } + + /** + * Sets the 'raw' ENA format location specifier e.g. join(12..45,89..121) + * + * @param loc + */ + public void setEnaLocation(String loc) + { + setValue(LOCATION, loc); + } + + /** + * Gets the 'raw' ENA format location specifier e.g. join(12..45,89..121) + * + * @param loc + */ + public String getEnaLocation() + { + return (String) getValue(LOCATION); + } + + /** + * Readable representation, for debug only, not guaranteed not to change + * between versions + */ + @Override + public String toString() + { + return String.format("%d %d %s %s", getBegin(), getEnd(), getType(), + getDescription()); + } + + /** + * Overridden to ensure that whenever two objects are equal, they have the + * same hashCode + */ + @Override + public int hashCode() + { + String s = getType() + getDescription() + getFeatureGroup() + + getValue("ID") + getValue("Name") + getValue("Parent") + + getPhase(); + return s.hashCode() + getBegin() + getEnd() + (int) getScore() + + getStrand(); + } + + /** + * Answers true if the feature's start/end values represent two related + * positions, rather than ends of a range. Such features may be visualised or + * reported differently to features on a range. + */ + @Override + public boolean isContactFeature() + { + return contactFeature; + } + + /** + * Answers true if the sequence has zero start and end position + * + * @return + */ + public boolean isNonPositional() + { + return begin == 0 && end == 0; + } + + /** + * Answers an html-formatted report of feature details. If parameter + * {@code mf} is not null, the feature is a virtual linked feature, and + * details included both the original location and the mapped location + * (CDS/peptide). + * + * @param seqName + * @param mf + * + * @return + */ + public String getDetailsReport(String seqName, MappedFeatures mf) + { + FeatureSourceI metadata = FeatureSources.getInstance() + .getSource(source); + + StringBuilder sb = new StringBuilder(128); + sb.append("
"); + sb.append(""); + String name = mf == null ? seqName : mf.getLinkedSequenceName(); + sb.append(String.format(ROW_DATA, "Location", name, + begin == end ? begin + : begin + (isContactFeature() ? ":" : "-") + end)); + + String consequence = ""; + if (mf != null) + { + int[] localRange = mf.getMappedPositions(begin, end); + int from = localRange[0]; + int to = localRange[localRange.length - 1]; + String s = mf.isFromCds() ? "Peptide Location" : "Coding location"; + sb.append(String.format(ROW_DATA, s, seqName, from == to ? from + : from + (isContactFeature() ? ":" : "-") + to)); + if (mf.isFromCds()) + { + consequence = mf.findProteinVariants(this); + } + } + sb.append(String.format(ROW_DATA, "Type", type, "")); + String desc = StringUtils.stripHtmlTags(description); + sb.append(String.format(ROW_DATA, "Description", desc, "")); + if (!Float.isNaN(score) && score != 0f) + { + sb.append(String.format(ROW_DATA, "Score", score, "")); + } + if (featureGroup != null) + { + sb.append(String.format(ROW_DATA, "Group", featureGroup, "")); + } + + if (!consequence.isEmpty()) + { + sb.append(String.format(ROW_DATA, "Consequence", + "Translated by Jalview", consequence)); + } + + if (otherDetails != null) + { + TreeMap ordered = new TreeMap<>( + String.CASE_INSENSITIVE_ORDER); + ordered.putAll(otherDetails); + + for (Entry entry : ordered.entrySet()) + { + String key = entry.getKey(); + + Object value = entry.getValue(); + if (value instanceof Map) + { + /* + * expand values in a Map attribute across separate lines + * copy to a TreeMap for alphabetical ordering + */ + Map values = (Map) value; + SortedMap sm = new TreeMap<>( + String.CASE_INSENSITIVE_ORDER); + sm.putAll(values); + for (Entry e : sm.entrySet()) + { + sb.append(String.format(ROW_DATA, key, e.getKey().toString(), e + .getValue().toString())); + } + } + else + { + // tried
but it failed to provide a tooltip :-( + String attDesc = null; + if (metadata != null) + { + attDesc = metadata.getAttributeName(key); + } + String s = entry.getValue().toString(); + if (isValueInteresting(key, s, metadata)) + { + sb.append(String.format(ROW_DATA, key, attDesc == null ? "" + : attDesc, s)); + } + } + } + } + sb.append("
"); + + String text = sb.toString(); + return text; + } + + /** + * Answers true if we judge the value is worth displaying, by some heuristic + * rules, else false + * + * @param key + * @param value + * @param metadata + * @return + */ + boolean isValueInteresting(String key, String value, + FeatureSourceI metadata) + { + /* + * currently suppressing zero values as well as null or empty + */ + if (value == null || "".equals(value) || ".".equals(value) + || "0".equals(value)) + { + return false; + } + + if (metadata == null) + { + return true; + } + + FeatureAttributeType attType = metadata.getAttributeType(key); + if (attType != null + && (attType == FeatureAttributeType.Float || attType + .equals(FeatureAttributeType.Integer))) + { + try + { + float fval = Float.valueOf(value); + if (fval == 0f) + { + return false; + } + } catch (NumberFormatException e) + { + // ignore + } + } + + return true; // default to interesting + } + + /** + * Sets the feature source identifier + * + * @param theSource + */ + public void setSource(String theSource) + { + source = theSource; + } +} + +class SFSortByEnd implements Comparator +{ + @Override + public int compare(SequenceFeature a, SequenceFeature b) + { + return a.getEnd() - b.getEnd(); } +} +class SFSortByBegin implements Comparator +{ + @Override + public int compare(SequenceFeature a, SequenceFeature b) + { + return a.getBegin() - b.getBegin(); + } }