2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.datamodel;
23 import jalview.datamodel.features.FeatureAttributeType;
24 import jalview.datamodel.features.FeatureAttributes;
25 import jalview.datamodel.features.FeatureLocationI;
26 import jalview.datamodel.features.FeatureSourceI;
27 import jalview.datamodel.features.FeatureSources;
28 import jalview.util.StringUtils;
30 import java.util.HashMap;
32 import java.util.Map.Entry;
33 import java.util.TreeMap;
34 import java.util.Vector;
37 * A class that models a single contiguous feature on a sequence. If flag
38 * 'contactFeature' is true, the start and end positions are interpreted instead
39 * as two contact points.
41 public class SequenceFeature implements FeatureLocationI
44 * score value if none is set; preferably Float.Nan, but see
45 * JAL-2060 and JAL-2554 for a couple of blockers to that
47 private static final float NO_SCORE = 0f;
49 private static final String STATUS = "status";
51 private static final String STRAND = "STRAND";
53 // private key for Phase designed not to conflict with real GFF data
54 private static final String PHASE = "!Phase";
56 // private key for ENA location designed not to conflict with real GFF data
57 private static final String LOCATION = "!Location";
59 private static final String ROW_DATA = "<tr><td>%s</td><td>%s</td><td>%s</td></tr>";
62 * map of otherDetails special keys, and their value fields' delimiter
64 private static final Map<String, String> INFO_KEYS = new HashMap<>();
68 INFO_KEYS.put("CSQ", ",");
69 // todo capture second level metadata (CSQ FORMAT)
70 // and delimiter "|" so as to report in a table within a table?
74 * ATTRIBUTES is reserved for the GFF 'column 9' data, formatted as
75 * name1=value1;name2=value2,value3;...etc
77 private static final String ATTRIBUTES = "ATTRIBUTES";
80 * type, begin, end, featureGroup, score and contactFeature are final
81 * to ensure that the integrity of SequenceFeatures data store
82 * can't be broken by direct update of these fields
84 public final String type;
86 public final int begin;
90 public final String featureGroup;
92 public final float score;
94 private final boolean contactFeature;
96 public String description;
99 * a map of key-value pairs; may be populated from GFF 'column 9' data,
100 * other data sources (e.g. GenBank file), or programmatically
102 public Map<String, Object> otherDetails;
104 public Vector<String> links;
107 * the identifier (if known) for the FeatureSource held in FeatureSources,
108 * as a provider of metadata about feature attributes
110 private String source;
113 * Constructs a duplicate feature. Note: Uses makes a shallow copy of the
114 * otherDetails map, so the new and original SequenceFeature may reference the
115 * same objects in the map.
119 public SequenceFeature(SequenceFeature cpy)
121 this(cpy, cpy.getBegin(), cpy.getEnd(), cpy.getFeatureGroup(), cpy
134 public SequenceFeature(String theType, String theDesc, int theBegin,
135 int theEnd, String group)
137 this(theType, theDesc, theBegin, theEnd, NO_SCORE, group);
141 * Constructor including a score value
150 public SequenceFeature(String theType, String theDesc, int theBegin,
151 int theEnd, float theScore, String group)
154 this.description = theDesc;
155 this.begin = theBegin;
157 this.featureGroup = group;
158 this.score = theScore;
161 * for now, only "Disulfide/disulphide bond" is treated as a contact feature
163 this.contactFeature = "disulfide bond".equalsIgnoreCase(type)
164 || "disulphide bond".equalsIgnoreCase(type);
168 * A copy constructor that allows the value of final fields to be 'modified'
177 public SequenceFeature(SequenceFeature sf, String newType, int newBegin,
178 int newEnd, String newGroup, float newScore)
180 this(newType, sf.getDescription(), newBegin, newEnd, newScore,
183 this.source = sf.source;
185 if (sf.otherDetails != null)
187 otherDetails = new HashMap<String, Object>();
188 for (Entry<String, Object> entry : sf.otherDetails.entrySet())
190 otherDetails.put(entry.getKey(), entry.getValue());
193 if (sf.links != null && sf.links.size() > 0)
195 links = new Vector<String>();
196 for (int i = 0, iSize = sf.links.size(); i < iSize; i++)
198 links.addElement(sf.links.elementAt(i));
204 * A copy constructor that allows the value of final fields to be 'modified'
212 public SequenceFeature(SequenceFeature sf, int newBegin, int newEnd,
213 String newGroup, float newScore)
215 this(sf, sf.getType(), newBegin, newEnd, newGroup, newScore);
219 * Two features are considered equal if they have the same type, group,
220 * description, start, end, phase, strand, and (if present) 'Name', ID' and
221 * 'Parent' attributes.
223 * Note we need to check Parent to distinguish the same exon occurring in
224 * different transcripts (in Ensembl GFF). This allows assembly of transcript
225 * sequences from their component exon regions.
228 public boolean equals(Object o)
230 return equals(o, false);
234 * Overloaded method allows the equality test to optionally ignore the
235 * 'Parent' attribute of a feature. This supports avoiding adding many
236 * superficially duplicate 'exon' or CDS features to genomic or protein
240 * @param ignoreParent
243 public boolean equals(Object o, boolean ignoreParent)
245 if (o == null || !(o instanceof SequenceFeature))
250 SequenceFeature sf = (SequenceFeature) o;
251 boolean sameScore = Float.isNaN(score) ? Float.isNaN(sf.score)
253 if (begin != sf.begin || end != sf.end || !sameScore)
258 if (getStrand() != sf.getStrand())
263 if (!(type + description + featureGroup + getPhase()).equals(
264 sf.type + sf.description + sf.featureGroup + sf.getPhase()))
268 if (!equalAttribute(getValue("ID"), sf.getValue("ID")))
272 if (!equalAttribute(getValue("Name"), sf.getValue("Name")))
278 if (!equalAttribute(getValue("Parent"), sf.getValue("Parent")))
287 * Returns true if both values are null, are both non-null and equal
293 protected static boolean equalAttribute(Object att1, Object att2)
295 if (att1 == null && att2 == null)
301 return att1.equals(att2);
303 return att2.equals(att1);
309 * @return DOCUMENT ME!
312 public int getBegin()
320 * @return DOCUMENT ME!
331 * @return DOCUMENT ME!
333 public String getType()
341 * @return DOCUMENT ME!
343 public String getDescription()
348 public void setDescription(String desc)
353 public String getFeatureGroup()
358 public void addLink(String labelLink)
362 links = new Vector<String>();
365 if (!links.contains(labelLink))
367 links.insertElementAt(labelLink, 0);
371 public float getScore()
377 * Used for getting values which are not in the basic set. eg STRAND, PHASE
383 public Object getValue(String key)
385 if (otherDetails == null)
391 return otherDetails.get(key);
396 * Answers the value of the specified attribute as string, or null if no such
402 public String getValueAsString(String key)
404 if (otherDetails == null)
408 Object value = otherDetails.get(key);
409 return value == null ? null : value.toString();
413 * Returns a property value for the given key if known, else the specified
417 * @param defaultValue
420 public Object getValue(String key, Object defaultValue)
422 Object value = getValue(key);
423 return value == null ? defaultValue : value;
427 * Used for setting values which are not in the basic set. eg STRAND, FRAME
435 public void setValue(String key, Object value)
439 if (otherDetails == null)
441 otherDetails = new HashMap<String, Object>();
444 otherDetails.put(key, value);
445 recordAttribute(key, value);
450 * Notifies the addition of a feature attribute. This lets us keep track of
451 * which attributes are present on each feature type, and also the range of
452 * numerical-valued attributes.
457 protected void recordAttribute(String key, Object value)
459 String attDesc = null;
462 attDesc = FeatureSources.getInstance().getSource(source)
463 .getAttributeName(key);
466 FeatureAttributes.getInstance().addAttribute(this.type, key, attDesc,
471 * The following methods are added to maintain the castor Uniprot mapping file
474 public void setStatus(String status)
476 setValue(STATUS, status);
479 public String getStatus()
481 return (String) getValue(STATUS);
484 public void setAttributes(String attr)
486 setValue(ATTRIBUTES, attr);
489 public String getAttributes()
491 return (String) getValue(ATTRIBUTES);
495 * Return 1 for forward strand ('+' in GFF), -1 for reverse strand ('-' in
496 * GFF), and 0 for unknown or not (validly) specified
500 public int getStrand()
503 if (otherDetails != null)
505 Object str = otherDetails.get(STRAND);
510 else if ("+".equals(str))
519 * Set the value of strand
522 * should be "+" for forward, or "-" for reverse
524 public void setStrand(String strand)
526 setValue(STRAND, strand);
529 public void setPhase(String phase)
531 setValue(PHASE, phase);
534 public String getPhase()
536 return (String) getValue(PHASE);
540 * Sets the 'raw' ENA format location specifier e.g. join(12..45,89..121)
544 public void setEnaLocation(String loc)
546 setValue(LOCATION, loc);
550 * Gets the 'raw' ENA format location specifier e.g. join(12..45,89..121)
554 public String getEnaLocation()
556 return (String) getValue(LOCATION);
560 * Readable representation, for debug only, not guaranteed not to change
564 public String toString()
566 return String.format("%d %d %s %s", getBegin(), getEnd(), getType(),
571 * Overridden to ensure that whenever two objects are equal, they have the
575 public int hashCode()
577 String s = getType() + getDescription() + getFeatureGroup()
578 + getValue("ID") + getValue("Name") + getValue("Parent")
580 return s.hashCode() + getBegin() + getEnd() + (int) getScore()
585 * Answers true if the feature's start/end values represent two related
586 * positions, rather than ends of a range. Such features may be visualised or
587 * reported differently to features on a range.
590 public boolean isContactFeature()
592 return contactFeature;
596 * Answers true if the sequence has zero start and end position
600 public boolean isNonPositional()
602 return begin == 0 && end == 0;
606 * Answers an html-formatted report of feature details
610 public String getDetailsReport()
612 FeatureSourceI metadata = FeatureSources.getInstance()
615 StringBuilder sb = new StringBuilder(128);
617 sb.append("<table>");
618 sb.append(String.format(ROW_DATA, "Type", type, ""));
619 sb.append(String.format(ROW_DATA, "Start/end", begin == end ? begin
620 : begin + (isContactFeature() ? ":" : "-") + end, ""));
621 String desc = StringUtils.stripHtmlTags(description);
622 sb.append(String.format(ROW_DATA, "Description", desc, ""));
623 if (!Float.isNaN(score) && score != 0f)
625 sb.append(String.format(ROW_DATA, "Score", score, ""));
627 if (featureGroup != null)
629 sb.append(String.format(ROW_DATA, "Group", featureGroup, ""));
632 if (otherDetails != null)
634 TreeMap<String, Object> ordered = new TreeMap<>(
635 String.CASE_INSENSITIVE_ORDER);
636 ordered.putAll(otherDetails);
638 for (Entry<String, Object> entry : ordered.entrySet())
640 String key = entry.getKey();
641 if (ATTRIBUTES.equals(key))
643 continue; // to avoid double reporting
645 if (INFO_KEYS.containsKey(key))
648 * split selected INFO data by delimiter over multiple lines
650 String delimiter = INFO_KEYS.get(key);
651 String[] values = entry.getValue().toString().split(delimiter);
652 for (String value : values)
654 sb.append(String.format(ROW_DATA, key, "", value));
658 { // tried <td title="key"> but it failed to provide a tooltip :-(
659 String attDesc = null;
660 if (metadata != null)
662 attDesc = metadata.getAttributeName(key);
664 String value = entry.getValue().toString();
665 if (isValueInteresting(key, value, metadata))
667 sb.append(String.format(ROW_DATA, key, attDesc == null ? ""
673 sb.append("</table>");
675 String text = sb.toString();
680 * Answers true if we judge the value is worth displaying, by some heuristic
688 boolean isValueInteresting(String key, String value,
689 FeatureSourceI metadata)
692 * currently suppressing zero values as well as null or empty
694 if (value == null || "".equals(value) || ".".equals(value)
695 || "0".equals(value))
700 if (metadata == null)
705 FeatureAttributeType attType = metadata.getAttributeType(key);
707 && (attType == FeatureAttributeType.Float || attType
708 .equals(FeatureAttributeType.Integer)))
712 float fval = Float.valueOf(value);
717 } catch (NumberFormatException e)
723 return true; // default to interesting
727 * Sets the feature source identifier
731 public void setSource(String theSource)