2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.datamodel;
23 import jalview.datamodel.features.FeatureAttributeType;
24 import jalview.datamodel.features.FeatureAttributes;
25 import jalview.datamodel.features.FeatureLocationI;
26 import jalview.datamodel.features.FeatureSourceI;
27 import jalview.datamodel.features.FeatureSources;
28 import jalview.util.StringUtils;
30 import java.util.Comparator;
31 import java.util.HashMap;
33 import java.util.Map.Entry;
34 import java.util.SortedMap;
35 import java.util.TreeMap;
36 import java.util.Vector;
38 import intervalstore.api.IntervalI;
41 * A class that models a single contiguous feature on a sequence. If flag
42 * 'contactFeature' is true, the start and end positions are interpreted instead
43 * as two contact points.
45 public class SequenceFeature implements FeatureLocationI
48 * score value if none is set; preferably Float.Nan, but see
49 * JAL-2060 and JAL-2554 for a couple of blockers to that
51 private static final float NO_SCORE = 0f;
53 private static final String STATUS = "status";
55 private static final String STRAND = "STRAND";
57 // private key for Phase designed not to conflict with real GFF data
58 private static final String PHASE = "!Phase";
60 // private key for ENA location designed not to conflict with real GFF data
61 private static final String LOCATION = "!Location";
63 private static final String ROW_DATA = "<tr><td>%s</td><td>%s</td><td>%s</td></tr>";
66 * ATTRIBUTES is reserved for the GFF 'column 9' data, formatted as
67 * name1=value1;name2=value2,value3;...etc
69 private static final String ATTRIBUTES = "ATTRIBUTES";
72 * type, begin, end, featureGroup, score and contactFeature are final
73 * to ensure that the integrity of SequenceFeatures data store
74 * can't be broken by direct update of these fields
76 public final String type;
78 public final int begin;
82 public final String featureGroup;
84 public final float score;
86 private final boolean contactFeature;
88 public String description;
91 * a map of key-value pairs; may be populated from GFF 'column 9' data,
92 * other data sources (e.g. GenBank file), or programmatically
94 public Map<String, Object> otherDetails;
96 public Vector<String> links;
99 * the identifier (if known) for the FeatureSource held in FeatureSources,
100 * as a provider of metadata about feature attributes
102 private String source;
105 * Constructs a duplicate feature. Note: Uses makes a shallow copy of the
106 * otherDetails map, so the new and original SequenceFeature may reference the
107 * same objects in the map.
111 public SequenceFeature(SequenceFeature cpy)
113 this(cpy, cpy.getBegin(), cpy.getEnd(), cpy.getFeatureGroup(), cpy
126 public SequenceFeature(String theType, String theDesc, int theBegin,
127 int theEnd, String group)
129 this(theType, theDesc, theBegin, theEnd, NO_SCORE, group);
133 * Constructor including a score value
142 public SequenceFeature(String theType, String theDesc, int theBegin,
143 int theEnd, float theScore, String group)
146 this.description = theDesc;
147 this.begin = theBegin;
149 this.featureGroup = group;
150 this.score = theScore;
153 * for now, only "Disulfide/disulphide bond" is treated as a contact feature
155 this.contactFeature = "disulfide bond".equalsIgnoreCase(type)
156 || "disulphide bond".equalsIgnoreCase(type);
160 * A copy constructor that allows the value of final fields to be 'modified'
169 public SequenceFeature(SequenceFeature sf, String newType, int newBegin,
170 int newEnd, String newGroup, float newScore)
172 this(newType, sf.getDescription(), newBegin, newEnd, newScore,
175 this.source = sf.source;
177 if (sf.otherDetails != null)
179 otherDetails = new HashMap<>();
180 for (Entry<String, Object> entry : sf.otherDetails.entrySet())
182 otherDetails.put(entry.getKey(), entry.getValue());
185 if (sf.links != null && sf.links.size() > 0)
187 links = new Vector<>();
188 for (int i = 0, iSize = sf.links.size(); i < iSize; i++)
190 links.addElement(sf.links.elementAt(i));
196 * A copy constructor that allows the value of final fields to be 'modified'
204 public SequenceFeature(SequenceFeature sf, int newBegin, int newEnd,
205 String newGroup, float newScore)
207 this(sf, sf.getType(), newBegin, newEnd, newGroup, newScore);
211 * Two features are considered equal if they have the same type, group,
212 * description, start, end, phase, strand, and (if present) 'Name', ID' and
213 * 'Parent' attributes.
215 * Note we need to check Parent to distinguish the same exon occurring in
216 * different transcripts (in Ensembl GFF). This allows assembly of transcript
217 * sequences from their component exon regions.
220 public boolean equals(Object o)
222 return (o != null && (o instanceof SequenceFeature)
223 && equalsInterval((SequenceFeature) o));
227 * Having determined that this is in fact a SequenceFeature, now check it for
228 * equivalence. Overridden in CrossRef; used by IntervalStore (possibly).
231 public boolean equalsInterval(IntervalI sf)
233 return sf != null && equals((SequenceFeature) sf, false);
236 * Overloaded method allows the equality test to optionally ignore the
237 * 'Parent' attribute of a feature. This supports avoiding adding many
238 * superficially duplicate 'exon' or CDS features to genomic or protein
242 * @param ignoreParent
245 public boolean equals(SequenceFeature sf, boolean ignoreParent)
247 return (begin == sf.begin && end == sf.end
248 && getStrand() == sf.getStrand()
249 && (Float.isNaN(score) ? Float.isNaN(sf.score)
251 && (type + description + featureGroup + getPhase())
252 .equals(sf.type + sf.description + sf.featureGroup
254 && equalAttribute(getValue("ID"), sf.getValue("ID"))
255 && equalAttribute(getValue("Name"), sf.getValue("Name"))
256 && (ignoreParent || equalAttribute(getValue("Parent"),
257 sf.getValue("Parent"))));
261 * Returns true if both values are null, are both non-null and equal
267 protected static boolean equalAttribute(Object att1, Object att2)
269 if (att1 == null && att2 == null)
275 return att1.equals(att2);
277 return att2.equals(att1);
283 * @return DOCUMENT ME!
286 public int getBegin()
294 * @return DOCUMENT ME!
305 * @return DOCUMENT ME!
307 public String getType()
315 * @return DOCUMENT ME!
317 public String getDescription()
322 public void setDescription(String desc)
327 public String getFeatureGroup()
332 public void addLink(String labelLink)
336 links = new Vector<>();
339 if (!links.contains(labelLink))
341 links.insertElementAt(labelLink, 0);
345 public float getScore()
351 * Used for getting values which are not in the basic set. eg STRAND, PHASE
357 public Object getValue(String key)
359 if (otherDetails == null)
365 return otherDetails.get(key);
370 * Answers the value of the specified attribute as string, or null if no such
371 * value. If more than one attribute name is provided, tries to resolve as keys
372 * to nested maps. For example, if attribute "CSQ" holds a map of key-value
373 * pairs, then getValueAsString("CSQ", "Allele") returns the value of "Allele"
379 public String getValueAsString(String... key)
381 if (otherDetails == null)
385 Object value = otherDetails.get(key[0]);
386 if (key.length > 1 && value instanceof Map<?, ?>)
388 value = ((Map) value).get(key[1]);
390 return value == null ? null : value.toString();
394 * Returns a property value for the given key if known, else the specified
398 * @param defaultValue
401 public Object getValue(String key, Object defaultValue)
403 Object value = getValue(key);
404 return value == null ? defaultValue : value;
408 * Used for setting values which are not in the basic set. eg STRAND, FRAME
416 public void setValue(String key, Object value)
420 if (otherDetails == null)
422 otherDetails = new HashMap<>();
425 otherDetails.put(key, value);
426 recordAttribute(key, value);
431 * Notifies the addition of a feature attribute. This lets us keep track of
432 * which attributes are present on each feature type, and also the range of
433 * numerical-valued attributes.
438 protected void recordAttribute(String key, Object value)
440 String attDesc = null;
443 attDesc = FeatureSources.getInstance().getSource(source)
444 .getAttributeName(key);
447 FeatureAttributes.getInstance().addAttribute(this.type, attDesc, value,
452 * The following methods are added to maintain the castor Uniprot mapping file
455 public void setStatus(String status)
457 setValue(STATUS, status);
460 public String getStatus()
462 return (String) getValue(STATUS);
465 public void setAttributes(String attr)
467 setValue(ATTRIBUTES, attr);
470 public String getAttributes()
472 return (String) getValue(ATTRIBUTES);
476 * Return 1 for forward strand ('+' in GFF), -1 for reverse strand ('-' in
477 * GFF), and 0 for unknown or not (validly) specified
481 public int getStrand()
484 if (otherDetails != null)
486 Object str = otherDetails.get(STRAND);
491 else if ("+".equals(str))
500 * Set the value of strand
503 * should be "+" for forward, or "-" for reverse
505 public void setStrand(String strand)
507 setValue(STRAND, strand);
510 public void setPhase(String phase)
512 setValue(PHASE, phase);
515 public String getPhase()
517 return (String) getValue(PHASE);
521 * Sets the 'raw' ENA format location specifier e.g. join(12..45,89..121)
525 public void setEnaLocation(String loc)
527 setValue(LOCATION, loc);
531 * Gets the 'raw' ENA format location specifier e.g. join(12..45,89..121)
535 public String getEnaLocation()
537 return (String) getValue(LOCATION);
541 * Readable representation, for debug only, not guaranteed not to change
545 public String toString()
547 return String.format("%d %d %s %s", getBegin(), getEnd(), getType(),
552 * Overridden to ensure that whenever two objects are equal, they have the
556 public int hashCode()
558 String s = getType() + getDescription() + getFeatureGroup()
559 + getValue("ID") + getValue("Name") + getValue("Parent")
561 return s.hashCode() + getBegin() + getEnd() + (int) getScore()
566 * Answers true if the feature's start/end values represent two related
567 * positions, rather than ends of a range. Such features may be visualised or
568 * reported differently to features on a range.
571 public boolean isContactFeature()
573 return contactFeature;
577 * Answers true if the sequence has zero start and end position
581 public boolean isNonPositional()
583 return begin == 0 && end == 0;
587 * Answers an html-formatted report of feature details
591 public String getDetailsReport()
593 FeatureSourceI metadata = FeatureSources.getInstance()
596 StringBuilder sb = new StringBuilder(128);
598 sb.append("<table>");
599 sb.append(String.format(ROW_DATA, "Type", type, ""));
600 sb.append(String.format(ROW_DATA, "Start/end", begin == end ? begin
601 : begin + (isContactFeature() ? ":" : "-") + end, ""));
602 String desc = StringUtils.stripHtmlTags(description);
603 sb.append(String.format(ROW_DATA, "Description", desc, ""));
604 if (!Float.isNaN(score) && score != 0f)
606 sb.append(String.format(ROW_DATA, "Score", score, ""));
608 if (featureGroup != null)
610 sb.append(String.format(ROW_DATA, "Group", featureGroup, ""));
613 if (otherDetails != null)
615 TreeMap<String, Object> ordered = new TreeMap<>(
616 String.CASE_INSENSITIVE_ORDER);
617 ordered.putAll(otherDetails);
619 for (Entry<String, Object> entry : ordered.entrySet())
621 String key = entry.getKey();
622 if (ATTRIBUTES.equals(key))
624 continue; // to avoid double reporting
627 Object value = entry.getValue();
628 if (value instanceof Map<?, ?>)
631 * expand values in a Map attribute across separate lines
632 * copy to a TreeMap for alphabetical ordering
634 Map<String, Object> values = (Map<String, Object>) value;
635 SortedMap<String, Object> sm = new TreeMap<>(
636 String.CASE_INSENSITIVE_ORDER);
638 for (Entry<?, ?> e : sm.entrySet())
640 sb.append(String.format(ROW_DATA, key, e.getKey().toString(), e
641 .getValue().toString()));
646 // tried <td title="key"> but it failed to provide a tooltip :-(
647 String attDesc = null;
648 if (metadata != null)
650 attDesc = metadata.getAttributeName(key);
652 String s = entry.getValue().toString();
653 if (isValueInteresting(key, s, metadata))
655 sb.append(String.format(ROW_DATA, key, attDesc == null ? ""
661 sb.append("</table>");
663 String text = sb.toString();
668 * Answers true if we judge the value is worth displaying, by some heuristic
676 boolean isValueInteresting(String key, String value,
677 FeatureSourceI metadata)
680 * currently suppressing zero values as well as null or empty
682 if (value == null || "".equals(value) || ".".equals(value)
683 || "0".equals(value))
688 if (metadata == null)
693 FeatureAttributeType attType = metadata.getAttributeType(key);
695 && (attType == FeatureAttributeType.Float || attType
696 .equals(FeatureAttributeType.Integer)))
700 float fval = Float.valueOf(value);
705 } catch (NumberFormatException e)
711 return true; // default to interesting
715 * Sets the feature source identifier
719 public void setSource(String theSource)
727 class SFSortByEnd implements Comparator<SequenceFeature>
730 public int compare(SequenceFeature a, SequenceFeature b)
732 return a.getEnd() - b.getEnd();
736 class SFSortByBegin implements Comparator<SequenceFeature>
739 public int compare(SequenceFeature a, SequenceFeature b)
741 return a.getBegin() - b.getBegin();