2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.datamodel;
23 import jalview.datamodel.features.FeatureAttributeType;
24 import jalview.datamodel.features.FeatureAttributes;
25 import jalview.datamodel.features.FeatureLocationI;
26 import jalview.datamodel.features.FeatureSourceI;
27 import jalview.datamodel.features.FeatureSources;
28 import jalview.util.StringUtils;
30 import java.util.Comparator;
31 import java.util.HashMap;
33 import java.util.Map.Entry;
34 import java.util.SortedMap;
35 import java.util.TreeMap;
36 import java.util.Vector;
38 import intervalstore.api.IntervalI;
41 * A class that models a single contiguous feature on a sequence. If flag
42 * 'contactFeature' is true, the start and end positions are interpreted instead
43 * as two contact points.
45 public class SequenceFeature implements FeatureLocationI
48 * score value if none is set; preferably Float.Nan, but see
49 * JAL-2060 and JAL-2554 for a couple of blockers to that
51 private static final float NO_SCORE = 0f;
53 private static final String STATUS = "status";
55 private static final String STRAND = "STRAND";
57 // private key for Phase designed not to conflict with real GFF data
58 private static final String PHASE = "!Phase";
60 // private key for ENA location designed not to conflict with real GFF data
61 private static final String LOCATION = "!Location";
63 private static final String ROW_DATA = "<tr><td>%s</td><td>%s</td><td>%s</td></tr>";
66 * ATTRIBUTES is reserved for the GFF 'column 9' data, formatted as
67 * name1=value1;name2=value2,value3;...etc
69 private static final String ATTRIBUTES = "ATTRIBUTES";
72 * type, begin, end, featureGroup, score and contactFeature are final
73 * to ensure that the integrity of SequenceFeatures data store
74 * can't be broken by direct update of these fields
76 public final String type;
78 public final int begin;
82 public final String featureGroup;
84 public final float score;
86 private final boolean contactFeature;
88 public String description;
91 * a map of key-value pairs; may be populated from GFF 'column 9' data,
92 * other data sources (e.g. GenBank file), or programmatically
94 public Map<String, Object> otherDetails;
96 public Vector<String> links;
99 * the identifier (if known) for the FeatureSource held in FeatureSources,
100 * as a provider of metadata about feature attributes
102 private String source;
105 * Constructs a duplicate feature. Note: Uses makes a shallow copy of the
106 * otherDetails map, so the new and original SequenceFeature may reference the
107 * same objects in the map.
111 public SequenceFeature(SequenceFeature cpy)
113 this(cpy, cpy.getBegin(), cpy.getEnd(), cpy.getFeatureGroup(), cpy
126 public SequenceFeature(String theType, String theDesc, int theBegin,
127 int theEnd, String group)
129 this(theType, theDesc, theBegin, theEnd, NO_SCORE, group);
133 * Constructor including a score value
142 public SequenceFeature(String theType, String theDesc, int theBegin,
143 int theEnd, float theScore, String group)
146 this.description = theDesc;
147 this.begin = theBegin;
149 this.featureGroup = group;
150 this.score = theScore;
153 * for now, only "Disulfide/disulphide bond" is treated as a contact feature
155 this.contactFeature = "disulfide bond".equalsIgnoreCase(type)
156 || "disulphide bond".equalsIgnoreCase(type);
160 * A copy constructor that allows the value of final fields to be 'modified'
169 public SequenceFeature(SequenceFeature sf, String newType, int newBegin,
170 int newEnd, String newGroup, float newScore)
172 this(newType, sf.getDescription(), newBegin, newEnd, newScore,
175 this.source = sf.source;
177 if (sf.otherDetails != null)
179 otherDetails = new HashMap<>();
180 for (Entry<String, Object> entry : sf.otherDetails.entrySet())
182 otherDetails.put(entry.getKey(), entry.getValue());
185 if (sf.links != null && sf.links.size() > 0)
187 links = new Vector<>();
188 for (int i = 0, iSize = sf.links.size(); i < iSize; i++)
190 links.addElement(sf.links.elementAt(i));
196 * A copy constructor that allows the value of final fields to be 'modified'
204 public SequenceFeature(SequenceFeature sf, int newBegin, int newEnd,
205 String newGroup, float newScore)
207 this(sf, sf.getType(), newBegin, newEnd, newGroup, newScore);
211 * Two features are considered equal if they have the same type, group,
212 * description, start, end, phase, strand, and (if present) 'Name', ID' and
213 * 'Parent' attributes.
215 * Note we need to check Parent to distinguish the same exon occurring in
216 * different transcripts (in Ensembl GFF). This allows assembly of transcript
217 * sequences from their component exon regions.
220 public boolean equals(Object o)
222 return (o != null && (o instanceof SequenceFeature)
223 && equalsInterval((SequenceFeature) o));
227 * Having determined that this is in fact a SequenceFeature, now check it for
228 * equivalence. Overridden in CrossRef; used by IntervalStore (possibly).
231 public boolean equalsInterval(IntervalI sf)
233 return equals((SequenceFeature) sf, false);
236 * Overloaded method allows the equality test to optionally ignore the
237 * 'Parent' attribute of a feature. This supports avoiding adding many
238 * superficially duplicate 'exon' or CDS features to genomic or protein
242 * @param ignoreParent
245 public boolean equals(SequenceFeature sf, boolean ignoreParent)
247 boolean sameScore = Float.isNaN(score) ? Float.isNaN(sf.score)
249 if (begin != sf.begin || end != sf.end || !sameScore)
254 if (getStrand() != sf.getStrand())
259 if (!(type + description + featureGroup + getPhase()).equals(
260 sf.type + sf.description + sf.featureGroup + sf.getPhase()))
264 if (!equalAttribute(getValue("ID"), sf.getValue("ID")))
268 if (!equalAttribute(getValue("Name"), sf.getValue("Name")))
274 if (!equalAttribute(getValue("Parent"), sf.getValue("Parent")))
283 * Returns true if both values are null, are both non-null and equal
289 protected static boolean equalAttribute(Object att1, Object att2)
291 if (att1 == null && att2 == null)
297 return att1.equals(att2);
299 return att2.equals(att1);
305 * @return DOCUMENT ME!
308 public int getBegin()
316 * @return DOCUMENT ME!
327 * @return DOCUMENT ME!
329 public String getType()
337 * @return DOCUMENT ME!
339 public String getDescription()
344 public void setDescription(String desc)
349 public String getFeatureGroup()
354 public void addLink(String labelLink)
358 links = new Vector<>();
361 if (!links.contains(labelLink))
363 links.insertElementAt(labelLink, 0);
367 public float getScore()
373 * Used for getting values which are not in the basic set. eg STRAND, PHASE
379 public Object getValue(String key)
381 if (otherDetails == null)
387 return otherDetails.get(key);
392 * Answers the value of the specified attribute as string, or null if no such
393 * value. If more than one attribute name is provided, tries to resolve as keys
394 * to nested maps. For example, if attribute "CSQ" holds a map of key-value
395 * pairs, then getValueAsString("CSQ", "Allele") returns the value of "Allele"
401 public String getValueAsString(String... key)
403 if (otherDetails == null)
407 Object value = otherDetails.get(key[0]);
408 if (key.length > 1 && value instanceof Map<?, ?>)
410 value = ((Map) value).get(key[1]);
412 return value == null ? null : value.toString();
416 * Returns a property value for the given key if known, else the specified
420 * @param defaultValue
423 public Object getValue(String key, Object defaultValue)
425 Object value = getValue(key);
426 return value == null ? defaultValue : value;
430 * Used for setting values which are not in the basic set. eg STRAND, FRAME
438 public void setValue(String key, Object value)
442 if (otherDetails == null)
444 otherDetails = new HashMap<>();
447 otherDetails.put(key, value);
448 recordAttribute(key, value);
453 * Notifies the addition of a feature attribute. This lets us keep track of
454 * which attributes are present on each feature type, and also the range of
455 * numerical-valued attributes.
460 protected void recordAttribute(String key, Object value)
462 String attDesc = null;
465 attDesc = FeatureSources.getInstance().getSource(source)
466 .getAttributeName(key);
469 FeatureAttributes.getInstance().addAttribute(this.type, attDesc, value,
474 * The following methods are added to maintain the castor Uniprot mapping file
477 public void setStatus(String status)
479 setValue(STATUS, status);
482 public String getStatus()
484 return (String) getValue(STATUS);
487 public void setAttributes(String attr)
489 setValue(ATTRIBUTES, attr);
492 public String getAttributes()
494 return (String) getValue(ATTRIBUTES);
498 * Return 1 for forward strand ('+' in GFF), -1 for reverse strand ('-' in
499 * GFF), and 0 for unknown or not (validly) specified
503 public int getStrand()
506 if (otherDetails != null)
508 Object str = otherDetails.get(STRAND);
513 else if ("+".equals(str))
522 * Set the value of strand
525 * should be "+" for forward, or "-" for reverse
527 public void setStrand(String strand)
529 setValue(STRAND, strand);
532 public void setPhase(String phase)
534 setValue(PHASE, phase);
537 public String getPhase()
539 return (String) getValue(PHASE);
543 * Sets the 'raw' ENA format location specifier e.g. join(12..45,89..121)
547 public void setEnaLocation(String loc)
549 setValue(LOCATION, loc);
553 * Gets the 'raw' ENA format location specifier e.g. join(12..45,89..121)
557 public String getEnaLocation()
559 return (String) getValue(LOCATION);
563 * Readable representation, for debug only, not guaranteed not to change
567 public String toString()
569 return String.format("%d %d %s %s", getBegin(), getEnd(), getType(),
574 * Overridden to ensure that whenever two objects are equal, they have the
578 public int hashCode()
580 String s = getType() + getDescription() + getFeatureGroup()
581 + getValue("ID") + getValue("Name") + getValue("Parent")
583 return s.hashCode() + getBegin() + getEnd() + (int) getScore()
588 * Answers true if the feature's start/end values represent two related
589 * positions, rather than ends of a range. Such features may be visualised or
590 * reported differently to features on a range.
593 public boolean isContactFeature()
595 return contactFeature;
599 * Answers true if the sequence has zero start and end position
603 public boolean isNonPositional()
605 return begin == 0 && end == 0;
609 * Answers an html-formatted report of feature details
613 public String getDetailsReport()
615 FeatureSourceI metadata = FeatureSources.getInstance()
618 StringBuilder sb = new StringBuilder(128);
620 sb.append("<table>");
621 sb.append(String.format(ROW_DATA, "Type", type, ""));
622 sb.append(String.format(ROW_DATA, "Start/end", begin == end ? begin
623 : begin + (isContactFeature() ? ":" : "-") + end, ""));
624 String desc = StringUtils.stripHtmlTags(description);
625 sb.append(String.format(ROW_DATA, "Description", desc, ""));
626 if (!Float.isNaN(score) && score != 0f)
628 sb.append(String.format(ROW_DATA, "Score", score, ""));
630 if (featureGroup != null)
632 sb.append(String.format(ROW_DATA, "Group", featureGroup, ""));
635 if (otherDetails != null)
637 TreeMap<String, Object> ordered = new TreeMap<>(
638 String.CASE_INSENSITIVE_ORDER);
639 ordered.putAll(otherDetails);
641 for (Entry<String, Object> entry : ordered.entrySet())
643 String key = entry.getKey();
644 if (ATTRIBUTES.equals(key))
646 continue; // to avoid double reporting
649 Object value = entry.getValue();
650 if (value instanceof Map<?, ?>)
653 * expand values in a Map attribute across separate lines
654 * copy to a TreeMap for alphabetical ordering
656 Map<String, Object> values = (Map<String, Object>) value;
657 SortedMap<String, Object> sm = new TreeMap<>(
658 String.CASE_INSENSITIVE_ORDER);
660 for (Entry<?, ?> e : sm.entrySet())
662 sb.append(String.format(ROW_DATA, key, e.getKey().toString(), e
663 .getValue().toString()));
668 // tried <td title="key"> but it failed to provide a tooltip :-(
669 String attDesc = null;
670 if (metadata != null)
672 attDesc = metadata.getAttributeName(key);
674 String s = entry.getValue().toString();
675 if (isValueInteresting(key, s, metadata))
677 sb.append(String.format(ROW_DATA, key, attDesc == null ? ""
683 sb.append("</table>");
685 String text = sb.toString();
690 * Answers true if we judge the value is worth displaying, by some heuristic
698 boolean isValueInteresting(String key, String value,
699 FeatureSourceI metadata)
702 * currently suppressing zero values as well as null or empty
704 if (value == null || "".equals(value) || ".".equals(value)
705 || "0".equals(value))
710 if (metadata == null)
715 FeatureAttributeType attType = metadata.getAttributeType(key);
717 && (attType == FeatureAttributeType.Float || attType
718 .equals(FeatureAttributeType.Integer)))
722 float fval = Float.valueOf(value);
727 } catch (NumberFormatException e)
733 return true; // default to interesting
737 * Sets the feature source identifier
741 public void setSource(String theSource)
749 class SFSortByEnd implements Comparator<SequenceFeature>
752 public int compare(SequenceFeature a, SequenceFeature b)
754 return a.getEnd() - b.getEnd();
758 class SFSortByBegin implements Comparator<SequenceFeature>
761 public int compare(SequenceFeature a, SequenceFeature b)
763 return a.getBegin() - b.getBegin();