2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.datamodel;
23 import jalview.datamodel.features.FeatureAttributeType;
24 import jalview.datamodel.features.FeatureAttributes;
25 import jalview.datamodel.features.FeatureLocationI;
26 import jalview.datamodel.features.FeatureSourceI;
27 import jalview.datamodel.features.FeatureSources;
28 import jalview.util.StringUtils;
30 import java.util.Comparator;
31 import java.util.HashMap;
33 import java.util.Map.Entry;
34 import java.util.SortedMap;
35 import java.util.TreeMap;
36 import java.util.Vector;
39 * A class that models a single contiguous feature on a sequence. If flag
40 * 'contactFeature' is true, the start and end positions are interpreted instead
41 * as two contact points.
43 public class SequenceFeature implements FeatureLocationI
46 * score value if none is set; preferably Float.Nan, but see
47 * JAL-2060 and JAL-2554 for a couple of blockers to that
49 private static final float NO_SCORE = 0f;
51 private static final String STATUS = "status";
53 private static final String STRAND = "STRAND";
55 // private key for Phase designed not to conflict with real GFF data
56 private static final String PHASE = "!Phase";
58 // private key for ENA location designed not to conflict with real GFF data
59 private static final String LOCATION = "!Location";
61 private static final String ROW_DATA = "<tr><td>%s</td><td>%s</td><td>%s</td></tr>";
64 * ATTRIBUTES is reserved for the GFF 'column 9' data, formatted as
65 * name1=value1;name2=value2,value3;...etc
67 private static final String ATTRIBUTES = "ATTRIBUTES";
70 * type, begin, end, featureGroup, score and contactFeature are final
71 * to ensure that the integrity of SequenceFeatures data store
72 * can't be broken by direct update of these fields
74 public final String type;
76 public final int begin;
80 public final String featureGroup;
82 public final float score;
84 private final boolean contactFeature;
86 public String description;
89 * a map of key-value pairs; may be populated from GFF 'column 9' data,
90 * other data sources (e.g. GenBank file), or programmatically
92 public Map<String, Object> otherDetails;
94 public Vector<String> links;
97 * the identifier (if known) for the FeatureSource held in FeatureSources,
98 * as a provider of metadata about feature attributes
100 private String source;
103 * Constructs a duplicate feature. Note: Uses makes a shallow copy of the
104 * otherDetails map, so the new and original SequenceFeature may reference the
105 * same objects in the map.
109 public SequenceFeature(SequenceFeature cpy)
111 this(cpy, cpy.getBegin(), cpy.getEnd(), cpy.getFeatureGroup(), cpy
124 public SequenceFeature(String theType, String theDesc, int theBegin,
125 int theEnd, String group)
127 this(theType, theDesc, theBegin, theEnd, NO_SCORE, group);
131 * Constructor including a score value
140 public SequenceFeature(String theType, String theDesc, int theBegin,
141 int theEnd, float theScore, String group)
144 this.description = theDesc;
145 this.begin = theBegin;
147 this.featureGroup = group;
148 this.score = theScore;
151 * for now, only "Disulfide/disulphide bond" is treated as a contact feature
153 this.contactFeature = "disulfide bond".equalsIgnoreCase(type)
154 || "disulphide bond".equalsIgnoreCase(type);
158 * A copy constructor that allows the value of final fields to be 'modified'
167 public SequenceFeature(SequenceFeature sf, String newType, int newBegin,
168 int newEnd, String newGroup, float newScore)
170 this(newType, sf.getDescription(), newBegin, newEnd, newScore,
173 this.source = sf.source;
175 if (sf.otherDetails != null)
177 otherDetails = new HashMap<>();
178 for (Entry<String, Object> entry : sf.otherDetails.entrySet())
180 otherDetails.put(entry.getKey(), entry.getValue());
183 if (sf.links != null && sf.links.size() > 0)
185 links = new Vector<>();
186 for (int i = 0, iSize = sf.links.size(); i < iSize; i++)
188 links.addElement(sf.links.elementAt(i));
194 * A copy constructor that allows the value of final fields to be 'modified'
202 public SequenceFeature(SequenceFeature sf, int newBegin, int newEnd,
203 String newGroup, float newScore)
205 this(sf, sf.getType(), newBegin, newEnd, newGroup, newScore);
209 * Two features are considered equal if they have the same type, group,
210 * description, start, end, phase, strand, and (if present) 'Name', ID' and
211 * 'Parent' attributes.
213 * Note we need to check Parent to distinguish the same exon occurring in
214 * different transcripts (in Ensembl GFF). This allows assembly of transcript
215 * sequences from their component exon regions.
218 public boolean equals(Object o)
220 return (o instanceof SequenceFeature
221 && equalsWithParent((SequenceFeature) o));
225 * BH 2019.09.22 required due to subclassing by CrossRef and used by
226 * intervalstore.nonc.IntervalStoreImpl
231 public boolean equalsWithParent(SequenceFeature sf)
233 return equals(sf, false);
237 * Overloaded method allows the equality test to optionally ignore the
238 * 'Parent' attribute of a feature. This supports avoiding adding many
239 * superficially duplicate 'exon' or CDS features to genomic or protein
243 * @param ignoreParent
246 public boolean equals(SequenceFeature sf, boolean ignoreParent)
248 return (begin == sf.begin && end == sf.end
249 && getStrand() == sf.getStrand()
250 && (Float.isNaN(score) ? Float.isNaN(sf.score)
252 && (type + description + featureGroup + getPhase())
253 .equals(sf.type + sf.description + sf.featureGroup
255 && equalAttribute(getValue("ID"), sf.getValue("ID"))
256 && equalAttribute(getValue("Name"), sf.getValue("Name"))
257 && (ignoreParent || equalAttribute(getValue("Parent"),
258 sf.getValue("Parent"))));
262 * Returns true if both values are null, are both non-null and equal
268 protected static boolean equalAttribute(Object att1, Object att2)
270 if (att1 == null && att2 == null)
276 return att1.equals(att2);
278 return att2.equals(att1);
284 * @return DOCUMENT ME!
287 public int getBegin()
295 * @return DOCUMENT ME!
306 * @return DOCUMENT ME!
308 public String getType()
316 * @return DOCUMENT ME!
318 public String getDescription()
323 public void setDescription(String desc)
328 public String getFeatureGroup()
333 public void addLink(String labelLink)
337 links = new Vector<>();
340 if (!links.contains(labelLink))
342 links.insertElementAt(labelLink, 0);
346 public float getScore()
352 * Used for getting values which are not in the basic set. eg STRAND, PHASE
358 public Object getValue(String key)
360 if (otherDetails == null)
366 return otherDetails.get(key);
371 * Answers the value of the specified attribute as string, or null if no such
372 * value. If more than one attribute name is provided, tries to resolve as keys
373 * to nested maps. For example, if attribute "CSQ" holds a map of key-value
374 * pairs, then getValueAsString("CSQ", "Allele") returns the value of "Allele"
380 public String getValueAsString(String... key)
382 if (otherDetails == null)
386 Object value = otherDetails.get(key[0]);
387 if (key.length > 1 && value instanceof Map<?, ?>)
389 value = ((Map) value).get(key[1]);
391 return value == null ? null : value.toString();
395 * Returns a property value for the given key if known, else the specified
399 * @param defaultValue
402 public Object getValue(String key, Object defaultValue)
404 Object value = getValue(key);
405 return value == null ? defaultValue : value;
409 * Used for setting values which are not in the basic set. eg STRAND, FRAME
417 public void setValue(String key, Object value)
421 if (otherDetails == null)
423 otherDetails = new HashMap<>();
426 otherDetails.put(key, value);
427 recordAttribute(key, value);
432 * Notifies the addition of a feature attribute. This lets us keep track of
433 * which attributes are present on each feature type, and also the range of
434 * numerical-valued attributes.
439 protected void recordAttribute(String key, Object value)
441 String attDesc = null;
444 attDesc = FeatureSources.getInstance().getSource(source)
445 .getAttributeName(key);
448 FeatureAttributes.getInstance().addAttribute(this.type, attDesc, value,
453 * The following methods are added to maintain the castor Uniprot mapping file
456 public void setStatus(String status)
458 setValue(STATUS, status);
461 public String getStatus()
463 return (String) getValue(STATUS);
466 public void setAttributes(String attr)
468 setValue(ATTRIBUTES, attr);
471 public String getAttributes()
473 return (String) getValue(ATTRIBUTES);
477 * Return 1 for forward strand ('+' in GFF), -1 for reverse strand ('-' in
478 * GFF), and 0 for unknown or not (validly) specified
482 public int getStrand()
485 if (otherDetails != null)
487 Object str = otherDetails.get(STRAND);
492 else if ("+".equals(str))
501 * Set the value of strand
504 * should be "+" for forward, or "-" for reverse
506 public void setStrand(String strand)
508 setValue(STRAND, strand);
511 public void setPhase(String phase)
513 setValue(PHASE, phase);
516 public String getPhase()
518 return (String) getValue(PHASE);
522 * Sets the 'raw' ENA format location specifier e.g. join(12..45,89..121)
526 public void setEnaLocation(String loc)
528 setValue(LOCATION, loc);
532 * Gets the 'raw' ENA format location specifier e.g. join(12..45,89..121)
536 public String getEnaLocation()
538 return (String) getValue(LOCATION);
542 * Readable representation, for debug only, not guaranteed not to change
546 public String toString()
548 return String.format("%d %d %s %s", getBegin(), getEnd(), getType(),
553 * Overridden to ensure that whenever two objects are equal, they have the
557 public int hashCode()
559 String s = getType() + getDescription() + getFeatureGroup()
560 + getValue("ID") + getValue("Name") + getValue("Parent")
562 return s.hashCode() + getBegin() + getEnd() + (int) getScore()
567 * Answers true if the feature's start/end values represent two related
568 * positions, rather than ends of a range. Such features may be visualised or
569 * reported differently to features on a range.
572 public boolean isContactFeature()
574 return contactFeature;
578 * Answers true if the sequence has zero start and end position
582 public boolean isNonPositional()
584 return begin == 0 && end == 0;
588 * Answers an html-formatted report of feature details
592 public String getDetailsReport()
594 FeatureSourceI metadata = FeatureSources.getInstance()
597 StringBuilder sb = new StringBuilder(128);
599 sb.append("<table>");
600 sb.append(String.format(ROW_DATA, "Type", type, ""));
601 sb.append(String.format(ROW_DATA, "Start/end", begin == end ? begin
602 : begin + (isContactFeature() ? ":" : "-") + end, ""));
603 String desc = StringUtils.stripHtmlTags(description);
604 sb.append(String.format(ROW_DATA, "Description", desc, ""));
605 if (!Float.isNaN(score) && score != 0f)
607 sb.append(String.format(ROW_DATA, "Score", score, ""));
609 if (featureGroup != null)
611 sb.append(String.format(ROW_DATA, "Group", featureGroup, ""));
614 if (otherDetails != null)
616 TreeMap<String, Object> ordered = new TreeMap<>(
617 String.CASE_INSENSITIVE_ORDER);
618 ordered.putAll(otherDetails);
620 for (Entry<String, Object> entry : ordered.entrySet())
622 String key = entry.getKey();
623 if (ATTRIBUTES.equals(key))
625 continue; // to avoid double reporting
628 Object value = entry.getValue();
629 if (value instanceof Map<?, ?>)
632 * expand values in a Map attribute across separate lines
633 * copy to a TreeMap for alphabetical ordering
635 Map<String, Object> values = (Map<String, Object>) value;
636 SortedMap<String, Object> sm = new TreeMap<>(
637 String.CASE_INSENSITIVE_ORDER);
639 for (Entry<?, ?> e : sm.entrySet())
641 sb.append(String.format(ROW_DATA, key, e.getKey().toString(), e
642 .getValue().toString()));
647 // tried <td title="key"> but it failed to provide a tooltip :-(
648 String attDesc = null;
649 if (metadata != null)
651 attDesc = metadata.getAttributeName(key);
653 String s = entry.getValue().toString();
654 if (isValueInteresting(key, s, metadata))
656 sb.append(String.format(ROW_DATA, key, attDesc == null ? ""
662 sb.append("</table>");
664 String text = sb.toString();
669 * Answers true if we judge the value is worth displaying, by some heuristic
677 boolean isValueInteresting(String key, String value,
678 FeatureSourceI metadata)
681 * currently suppressing zero values as well as null or empty
683 if (value == null || "".equals(value) || ".".equals(value)
684 || "0".equals(value))
689 if (metadata == null)
694 FeatureAttributeType attType = metadata.getAttributeType(key);
696 && (attType == FeatureAttributeType.Float || attType
697 .equals(FeatureAttributeType.Integer)))
701 float fval = Float.valueOf(value);
706 } catch (NumberFormatException e)
712 return true; // default to interesting
716 * Sets the feature source identifier
720 public void setSource(String theSource)
728 class SFSortByEnd implements Comparator<SequenceFeature>
731 public int compare(SequenceFeature a, SequenceFeature b)
733 return a.getEnd() - b.getEnd();
737 class SFSortByBegin implements Comparator<SequenceFeature>
740 public int compare(SequenceFeature a, SequenceFeature b)
742 return a.getBegin() - b.getBegin();