2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.datamodel;
23 import jalview.datamodel.features.FeatureAttributeType;
24 import jalview.datamodel.features.FeatureAttributes;
25 import jalview.datamodel.features.FeatureLocationI;
26 import jalview.datamodel.features.FeatureSourceI;
27 import jalview.datamodel.features.FeatureSources;
28 import jalview.util.StringUtils;
30 import java.util.Comparator;
31 import java.util.HashMap;
33 import java.util.Map.Entry;
34 import java.util.SortedMap;
35 import java.util.TreeMap;
36 import java.util.Vector;
38 import intervalstore.api.IntervalI;
41 * A class that models a single contiguous feature on a sequence. If flag
42 * 'contactFeature' is true, the start and end positions are interpreted instead
43 * as two contact points.
45 public class SequenceFeature implements FeatureLocationI
48 * score value if none is set; preferably Float.Nan, but see
49 * JAL-2060 and JAL-2554 for a couple of blockers to that
51 private static final float NO_SCORE = 0f;
53 private static final String STATUS = "status";
55 private static final String STRAND = "STRAND";
57 // private key for Phase designed not to conflict with real GFF data
58 private static final String PHASE = "!Phase";
60 // private key for ENA location designed not to conflict with real GFF data
61 private static final String LOCATION = "!Location";
63 private static final String ROW_DATA = "<tr><td>%s</td><td>%s</td><td>%s</td></tr>";
66 * ATTRIBUTES is reserved for the GFF 'column 9' data, formatted as
67 * name1=value1;name2=value2,value3;...etc
69 private static final String ATTRIBUTES = "ATTRIBUTES";
72 * type, begin, end, featureGroup, score and contactFeature are final
73 * to ensure that the integrity of SequenceFeatures data store
74 * can't be broken by direct update of these fields
76 public final String type;
78 public final int begin;
82 public final String featureGroup;
84 public final float score;
86 private final boolean contactFeature;
88 public String description;
91 * a map of key-value pairs; may be populated from GFF 'column 9' data,
92 * other data sources (e.g. GenBank file), or programmatically
94 public Map<String, Object> otherDetails;
96 public Vector<String> links;
99 * the identifier (if known) for the FeatureSource held in FeatureSources,
100 * as a provider of metadata about feature attributes
102 private String source;
105 * 1-based index into the featureList used by FeatureStoreJS
110 * containment nesting link used by FeatureStoreJS to track starting points
112 public SequenceFeature containedBy;
115 * Constructs a duplicate feature. Note: Uses makes a shallow copy of the
116 * otherDetails map, so the new and original SequenceFeature may reference the
117 * same objects in the map.
121 public SequenceFeature(SequenceFeature cpy)
123 this(cpy, cpy.getBegin(), cpy.getEnd(), cpy.getFeatureGroup(), cpy
136 public SequenceFeature(String theType, String theDesc, int theBegin,
137 int theEnd, String group)
139 this(theType, theDesc, theBegin, theEnd, NO_SCORE, group);
143 * Constructor including a score value
152 public SequenceFeature(String theType, String theDesc, int theBegin,
153 int theEnd, float theScore, String group)
156 this.description = theDesc;
157 this.begin = theBegin;
159 this.featureGroup = group;
160 this.score = theScore;
163 * for now, only "Disulfide/disulphide bond" is treated as a contact feature
165 this.contactFeature = "disulfide bond".equalsIgnoreCase(type)
166 || "disulphide bond".equalsIgnoreCase(type);
170 * A copy constructor that allows the value of final fields to be 'modified'
179 public SequenceFeature(SequenceFeature sf, String newType, int newBegin,
180 int newEnd, String newGroup, float newScore)
182 this(newType, sf.getDescription(), newBegin, newEnd, newScore,
185 this.source = sf.source;
187 if (sf.otherDetails != null)
189 otherDetails = new HashMap<>();
190 for (Entry<String, Object> entry : sf.otherDetails.entrySet())
192 otherDetails.put(entry.getKey(), entry.getValue());
195 if (sf.links != null && sf.links.size() > 0)
197 links = new Vector<>();
198 for (int i = 0, iSize = sf.links.size(); i < iSize; i++)
200 links.addElement(sf.links.elementAt(i));
206 * A copy constructor that allows the value of final fields to be 'modified'
214 public SequenceFeature(SequenceFeature sf, int newBegin, int newEnd,
215 String newGroup, float newScore)
217 this(sf, sf.getType(), newBegin, newEnd, newGroup, newScore);
221 * Two features are considered equal if they have the same type, group,
222 * description, start, end, phase, strand, and (if present) 'Name', ID' and
223 * 'Parent' attributes.
225 * Note we need to check Parent to distinguish the same exon occurring in
226 * different transcripts (in Ensembl GFF). This allows assembly of transcript
227 * sequences from their component exon regions.
230 public boolean equals(Object o)
232 return equals(o, false);
236 * Overloaded method allows the equality test to optionally ignore the
237 * 'Parent' attribute of a feature. This supports avoiding adding many
238 * superficially duplicate 'exon' or CDS features to genomic or protein
242 * @param ignoreParent
245 public boolean equals(Object o, boolean ignoreParent)
247 if (o == null || !(o instanceof SequenceFeature))
252 SequenceFeature sf = (SequenceFeature) o;
253 boolean sameScore = Float.isNaN(score) ? Float.isNaN(sf.score)
255 if (begin != sf.begin || end != sf.end || !sameScore)
260 if (getStrand() != sf.getStrand())
265 if (!(type + description + featureGroup + getPhase()).equals(
266 sf.type + sf.description + sf.featureGroup + sf.getPhase()))
270 if (!equalAttribute(getValue("ID"), sf.getValue("ID")))
274 if (!equalAttribute(getValue("Name"), sf.getValue("Name")))
280 if (!equalAttribute(getValue("Parent"), sf.getValue("Parent")))
289 * Returns true if both values are null, are both non-null and equal
295 protected static boolean equalAttribute(Object att1, Object att2)
297 if (att1 == null && att2 == null)
303 return att1.equals(att2);
305 return att2.equals(att1);
311 * @return DOCUMENT ME!
314 public int getBegin()
322 * @return DOCUMENT ME!
333 * @return DOCUMENT ME!
335 public String getType()
343 * @return DOCUMENT ME!
345 public String getDescription()
350 public void setDescription(String desc)
355 public String getFeatureGroup()
360 public void addLink(String labelLink)
364 links = new Vector<>();
367 if (!links.contains(labelLink))
369 links.insertElementAt(labelLink, 0);
373 public float getScore()
379 * Used for getting values which are not in the basic set. eg STRAND, PHASE
385 public Object getValue(String key)
387 if (otherDetails == null)
393 return otherDetails.get(key);
398 * Answers the value of the specified attribute as string, or null if no such
399 * value. If more than one attribute name is provided, tries to resolve as keys
400 * to nested maps. For example, if attribute "CSQ" holds a map of key-value
401 * pairs, then getValueAsString("CSQ", "Allele") returns the value of "Allele"
407 public String getValueAsString(String... key)
409 if (otherDetails == null)
413 Object value = otherDetails.get(key[0]);
414 if (key.length > 1 && value instanceof Map<?, ?>)
416 value = ((Map) value).get(key[1]);
418 return value == null ? null : value.toString();
422 * Returns a property value for the given key if known, else the specified
426 * @param defaultValue
429 public Object getValue(String key, Object defaultValue)
431 Object value = getValue(key);
432 return value == null ? defaultValue : value;
436 * Used for setting values which are not in the basic set. eg STRAND, FRAME
444 public void setValue(String key, Object value)
448 if (otherDetails == null)
450 otherDetails = new HashMap<>();
453 otherDetails.put(key, value);
454 recordAttribute(key, value);
459 * Notifies the addition of a feature attribute. This lets us keep track of
460 * which attributes are present on each feature type, and also the range of
461 * numerical-valued attributes.
466 protected void recordAttribute(String key, Object value)
468 String attDesc = null;
471 attDesc = FeatureSources.getInstance().getSource(source)
472 .getAttributeName(key);
475 FeatureAttributes.getInstance().addAttribute(this.type, attDesc, value,
480 * The following methods are added to maintain the castor Uniprot mapping file
483 public void setStatus(String status)
485 setValue(STATUS, status);
488 public String getStatus()
490 return (String) getValue(STATUS);
493 public void setAttributes(String attr)
495 setValue(ATTRIBUTES, attr);
498 public String getAttributes()
500 return (String) getValue(ATTRIBUTES);
504 * Return 1 for forward strand ('+' in GFF), -1 for reverse strand ('-' in
505 * GFF), and 0 for unknown or not (validly) specified
509 public int getStrand()
512 if (otherDetails != null)
514 Object str = otherDetails.get(STRAND);
519 else if ("+".equals(str))
528 * Set the value of strand
531 * should be "+" for forward, or "-" for reverse
533 public void setStrand(String strand)
535 setValue(STRAND, strand);
538 public void setPhase(String phase)
540 setValue(PHASE, phase);
543 public String getPhase()
545 return (String) getValue(PHASE);
549 * Sets the 'raw' ENA format location specifier e.g. join(12..45,89..121)
553 public void setEnaLocation(String loc)
555 setValue(LOCATION, loc);
559 * Gets the 'raw' ENA format location specifier e.g. join(12..45,89..121)
563 public String getEnaLocation()
565 return (String) getValue(LOCATION);
569 * Readable representation, for debug only, not guaranteed not to change
573 public String toString()
575 return String.format("%d %d %s %s", getBegin(), getEnd(), getType(),
580 * Overridden to ensure that whenever two objects are equal, they have the
584 public int hashCode()
586 String s = getType() + getDescription() + getFeatureGroup()
587 + getValue("ID") + getValue("Name") + getValue("Parent")
589 return s.hashCode() + getBegin() + getEnd() + (int) getScore()
594 * Answers true if the feature's start/end values represent two related
595 * positions, rather than ends of a range. Such features may be visualised or
596 * reported differently to features on a range.
599 public boolean isContactFeature()
601 return contactFeature;
605 * Answers true if the sequence has zero start and end position
609 public boolean isNonPositional()
611 return begin == 0 && end == 0;
615 * Answers an html-formatted report of feature details
619 public String getDetailsReport()
621 FeatureSourceI metadata = FeatureSources.getInstance()
624 StringBuilder sb = new StringBuilder(128);
626 sb.append("<table>");
627 sb.append(String.format(ROW_DATA, "Type", type, ""));
628 sb.append(String.format(ROW_DATA, "Start/end", begin == end ? begin
629 : begin + (isContactFeature() ? ":" : "-") + end, ""));
630 String desc = StringUtils.stripHtmlTags(description);
631 sb.append(String.format(ROW_DATA, "Description", desc, ""));
632 if (!Float.isNaN(score) && score != 0f)
634 sb.append(String.format(ROW_DATA, "Score", score, ""));
636 if (featureGroup != null)
638 sb.append(String.format(ROW_DATA, "Group", featureGroup, ""));
641 if (otherDetails != null)
643 TreeMap<String, Object> ordered = new TreeMap<>(
644 String.CASE_INSENSITIVE_ORDER);
645 ordered.putAll(otherDetails);
647 for (Entry<String, Object> entry : ordered.entrySet())
649 String key = entry.getKey();
650 if (ATTRIBUTES.equals(key))
652 continue; // to avoid double reporting
655 Object value = entry.getValue();
656 if (value instanceof Map<?, ?>)
659 * expand values in a Map attribute across separate lines
660 * copy to a TreeMap for alphabetical ordering
662 Map<String, Object> values = (Map<String, Object>) value;
663 SortedMap<String, Object> sm = new TreeMap<>(
664 String.CASE_INSENSITIVE_ORDER);
666 for (Entry<?, ?> e : sm.entrySet())
668 sb.append(String.format(ROW_DATA, key, e.getKey().toString(), e
669 .getValue().toString()));
674 // tried <td title="key"> but it failed to provide a tooltip :-(
675 String attDesc = null;
676 if (metadata != null)
678 attDesc = metadata.getAttributeName(key);
680 String s = entry.getValue().toString();
681 if (isValueInteresting(key, s, metadata))
683 sb.append(String.format(ROW_DATA, key, attDesc == null ? ""
689 sb.append("</table>");
691 String text = sb.toString();
696 * Answers true if we judge the value is worth displaying, by some heuristic
704 boolean isValueInteresting(String key, String value,
705 FeatureSourceI metadata)
708 * currently suppressing zero values as well as null or empty
710 if (value == null || "".equals(value) || ".".equals(value)
711 || "0".equals(value))
716 if (metadata == null)
721 FeatureAttributeType attType = metadata.getAttributeType(key);
723 && (attType == FeatureAttributeType.Float || attType
724 .equals(FeatureAttributeType.Integer)))
728 float fval = Float.valueOf(value);
733 } catch (NumberFormatException e)
739 return true; // default to interesting
743 * Sets the feature source identifier
747 public void setSource(String theSource)
753 public IntervalI getContainedBy()
759 public void setContainedBy(IntervalI containedBy)
761 this.containedBy = (SequenceFeature) containedBy;
767 class SFSortByEnd implements Comparator<SequenceFeature>
770 public int compare(SequenceFeature a, SequenceFeature b)
772 return a.getEnd() - b.getEnd();
776 class SFSortByBegin implements Comparator<SequenceFeature>
779 public int compare(SequenceFeature a, SequenceFeature b)
781 return a.getBegin() - b.getBegin();