2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.datamodel;
23 import jalview.datamodel.features.FeatureAttributeType;
24 import jalview.datamodel.features.FeatureAttributes;
25 import jalview.datamodel.features.FeatureLocationI;
26 import jalview.datamodel.features.FeatureSourceI;
27 import jalview.datamodel.features.FeatureSources;
28 import jalview.util.StringUtils;
30 import java.util.Comparator;
31 import java.util.HashMap;
33 import java.util.Map.Entry;
34 import java.util.SortedMap;
35 import java.util.TreeMap;
36 import java.util.Vector;
39 * A class that models a single contiguous feature on a sequence. If flag
40 * 'contactFeature' is true, the start and end positions are interpreted instead
41 * as two contact points.
43 public class SequenceFeature implements FeatureLocationI
46 * score value if none is set; preferably Float.Nan, but see
47 * JAL-2060 and JAL-2554 for a couple of blockers to that
49 private static final float NO_SCORE = 0f;
51 private static final String STATUS = "status";
53 private static final String STRAND = "STRAND";
55 // private key for Phase designed not to conflict with real GFF data
56 private static final String PHASE = "!Phase";
58 // private key for ENA location designed not to conflict with real GFF data
59 private static final String LOCATION = "!Location";
61 private static final String ROW_DATA = "<tr><td>%s</td><td>%s</td><td>%s</td></tr>";
64 * ATTRIBUTES is reserved for the GFF 'column 9' data, formatted as
65 * name1=value1;name2=value2,value3;...etc
67 private static final String ATTRIBUTES = "ATTRIBUTES";
70 * type, begin, end, featureGroup, score and contactFeature are final
71 * to ensure that the integrity of SequenceFeatures data store
72 * can't be broken by direct update of these fields
74 public final String type;
76 public final int begin;
80 public final String featureGroup;
82 public final float score;
84 private final boolean contactFeature;
86 public String description;
89 * a map of key-value pairs; may be populated from GFF 'column 9' data,
90 * other data sources (e.g. GenBank file), or programmatically
92 public Map<String, Object> otherDetails;
94 public Vector<String> links;
97 * the identifier (if known) for the FeatureSource held in FeatureSources,
98 * as a provider of metadata about feature attributes
100 private String source;
102 // for Overview sort:
105 public SequenceFeature containedBy;
108 * Constructs a duplicate feature. Note: Uses makes a shallow copy of the
109 * otherDetails map, so the new and original SequenceFeature may reference the
110 * same objects in the map.
114 public SequenceFeature(SequenceFeature cpy)
116 this(cpy, cpy.getBegin(), cpy.getEnd(), cpy.getFeatureGroup(), cpy
129 public SequenceFeature(String theType, String theDesc, int theBegin,
130 int theEnd, String group)
132 this(theType, theDesc, theBegin, theEnd, NO_SCORE, group);
136 * Constructor including a score value
145 public SequenceFeature(String theType, String theDesc, int theBegin,
146 int theEnd, float theScore, String group)
149 this.description = theDesc;
150 this.begin = theBegin;
152 this.featureGroup = group;
153 this.score = theScore;
156 * for now, only "Disulfide/disulphide bond" is treated as a contact feature
158 this.contactFeature = "disulfide bond".equalsIgnoreCase(type)
159 || "disulphide bond".equalsIgnoreCase(type);
163 * A copy constructor that allows the value of final fields to be 'modified'
172 public SequenceFeature(SequenceFeature sf, String newType, int newBegin,
173 int newEnd, String newGroup, float newScore)
175 this(newType, sf.getDescription(), newBegin, newEnd, newScore,
178 this.source = sf.source;
180 if (sf.otherDetails != null)
182 otherDetails = new HashMap<>();
183 for (Entry<String, Object> entry : sf.otherDetails.entrySet())
185 otherDetails.put(entry.getKey(), entry.getValue());
188 if (sf.links != null && sf.links.size() > 0)
190 links = new Vector<>();
191 for (int i = 0, iSize = sf.links.size(); i < iSize; i++)
193 links.addElement(sf.links.elementAt(i));
199 * A copy constructor that allows the value of final fields to be 'modified'
207 public SequenceFeature(SequenceFeature sf, int newBegin, int newEnd,
208 String newGroup, float newScore)
210 this(sf, sf.getType(), newBegin, newEnd, newGroup, newScore);
214 * Two features are considered equal if they have the same type, group,
215 * description, start, end, phase, strand, and (if present) 'Name', ID' and
216 * 'Parent' attributes.
218 * Note we need to check Parent to distinguish the same exon occurring in
219 * different transcripts (in Ensembl GFF). This allows assembly of transcript
220 * sequences from their component exon regions.
223 public boolean equals(Object o)
225 return equals(o, false);
229 * Overloaded method allows the equality test to optionally ignore the
230 * 'Parent' attribute of a feature. This supports avoiding adding many
231 * superficially duplicate 'exon' or CDS features to genomic or protein
235 * @param ignoreParent
238 public boolean equals(Object o, boolean ignoreParent)
240 if (o == null || !(o instanceof SequenceFeature))
245 SequenceFeature sf = (SequenceFeature) o;
246 boolean sameScore = Float.isNaN(score) ? Float.isNaN(sf.score)
248 if (begin != sf.begin || end != sf.end || !sameScore)
253 if (getStrand() != sf.getStrand())
258 if (!(type + description + featureGroup + getPhase()).equals(
259 sf.type + sf.description + sf.featureGroup + sf.getPhase()))
263 if (!equalAttribute(getValue("ID"), sf.getValue("ID")))
267 if (!equalAttribute(getValue("Name"), sf.getValue("Name")))
273 if (!equalAttribute(getValue("Parent"), sf.getValue("Parent")))
282 * Returns true if both values are null, are both non-null and equal
288 protected static boolean equalAttribute(Object att1, Object att2)
290 if (att1 == null && att2 == null)
296 return att1.equals(att2);
298 return att2.equals(att1);
304 * @return DOCUMENT ME!
307 public int getBegin()
315 * @return DOCUMENT ME!
326 * @return DOCUMENT ME!
328 public String getType()
336 * @return DOCUMENT ME!
338 public String getDescription()
343 public void setDescription(String desc)
348 public String getFeatureGroup()
353 public void addLink(String labelLink)
357 links = new Vector<>();
360 if (!links.contains(labelLink))
362 links.insertElementAt(labelLink, 0);
366 public float getScore()
372 * Used for getting values which are not in the basic set. eg STRAND, PHASE
378 public Object getValue(String key)
380 if (otherDetails == null)
386 return otherDetails.get(key);
391 * Answers the value of the specified attribute as string, or null if no such
392 * value. If more than one attribute name is provided, tries to resolve as keys
393 * to nested maps. For example, if attribute "CSQ" holds a map of key-value
394 * pairs, then getValueAsString("CSQ", "Allele") returns the value of "Allele"
400 public String getValueAsString(String... key)
402 if (otherDetails == null)
406 Object value = otherDetails.get(key[0]);
407 if (key.length > 1 && value instanceof Map<?, ?>)
409 value = ((Map) value).get(key[1]);
411 return value == null ? null : value.toString();
415 * Returns a property value for the given key if known, else the specified
419 * @param defaultValue
422 public Object getValue(String key, Object defaultValue)
424 Object value = getValue(key);
425 return value == null ? defaultValue : value;
429 * Used for setting values which are not in the basic set. eg STRAND, FRAME
437 public void setValue(String key, Object value)
441 if (otherDetails == null)
443 otherDetails = new HashMap<>();
446 otherDetails.put(key, value);
447 recordAttribute(key, value);
452 * Notifies the addition of a feature attribute. This lets us keep track of
453 * which attributes are present on each feature type, and also the range of
454 * numerical-valued attributes.
459 protected void recordAttribute(String key, Object value)
461 String attDesc = null;
464 attDesc = FeatureSources.getInstance().getSource(source)
465 .getAttributeName(key);
468 FeatureAttributes.getInstance().addAttribute(this.type, attDesc, value,
473 * The following methods are added to maintain the castor Uniprot mapping file
476 public void setStatus(String status)
478 setValue(STATUS, status);
481 public String getStatus()
483 return (String) getValue(STATUS);
486 public void setAttributes(String attr)
488 setValue(ATTRIBUTES, attr);
491 public String getAttributes()
493 return (String) getValue(ATTRIBUTES);
497 * Return 1 for forward strand ('+' in GFF), -1 for reverse strand ('-' in
498 * GFF), and 0 for unknown or not (validly) specified
502 public int getStrand()
505 if (otherDetails != null)
507 Object str = otherDetails.get(STRAND);
512 else if ("+".equals(str))
521 * Set the value of strand
524 * should be "+" for forward, or "-" for reverse
526 public void setStrand(String strand)
528 setValue(STRAND, strand);
531 public void setPhase(String phase)
533 setValue(PHASE, phase);
536 public String getPhase()
538 return (String) getValue(PHASE);
542 * Sets the 'raw' ENA format location specifier e.g. join(12..45,89..121)
546 public void setEnaLocation(String loc)
548 setValue(LOCATION, loc);
552 * Gets the 'raw' ENA format location specifier e.g. join(12..45,89..121)
556 public String getEnaLocation()
558 return (String) getValue(LOCATION);
562 * Readable representation, for debug only, not guaranteed not to change
566 public String toString()
568 return String.format("%d %d %s %s", getBegin(), getEnd(), getType(),
573 * Overridden to ensure that whenever two objects are equal, they have the
577 public int hashCode()
579 String s = getType() + getDescription() + getFeatureGroup()
580 + getValue("ID") + getValue("Name") + getValue("Parent")
582 return s.hashCode() + getBegin() + getEnd() + (int) getScore()
587 * Answers true if the feature's start/end values represent two related
588 * positions, rather than ends of a range. Such features may be visualised or
589 * reported differently to features on a range.
592 public boolean isContactFeature()
594 return contactFeature;
598 * Answers true if the sequence has zero start and end position
602 public boolean isNonPositional()
604 return begin == 0 && end == 0;
608 * Answers an html-formatted report of feature details
612 public String getDetailsReport()
614 FeatureSourceI metadata = FeatureSources.getInstance()
617 StringBuilder sb = new StringBuilder(128);
619 sb.append("<table>");
620 sb.append(String.format(ROW_DATA, "Type", type, ""));
621 sb.append(String.format(ROW_DATA, "Start/end", begin == end ? begin
622 : begin + (isContactFeature() ? ":" : "-") + end, ""));
623 String desc = StringUtils.stripHtmlTags(description);
624 sb.append(String.format(ROW_DATA, "Description", desc, ""));
625 if (!Float.isNaN(score) && score != 0f)
627 sb.append(String.format(ROW_DATA, "Score", score, ""));
629 if (featureGroup != null)
631 sb.append(String.format(ROW_DATA, "Group", featureGroup, ""));
634 if (otherDetails != null)
636 TreeMap<String, Object> ordered = new TreeMap<>(
637 String.CASE_INSENSITIVE_ORDER);
638 ordered.putAll(otherDetails);
640 for (Entry<String, Object> entry : ordered.entrySet())
642 String key = entry.getKey();
643 if (ATTRIBUTES.equals(key))
645 continue; // to avoid double reporting
648 Object value = entry.getValue();
649 if (value instanceof Map<?, ?>)
652 * expand values in a Map attribute across separate lines
653 * copy to a TreeMap for alphabetical ordering
655 Map<String, Object> values = (Map<String, Object>) value;
656 SortedMap<String, Object> sm = new TreeMap<>(
657 String.CASE_INSENSITIVE_ORDER);
659 for (Entry<?, ?> e : sm.entrySet())
661 sb.append(String.format(ROW_DATA, key, e.getKey().toString(), e
662 .getValue().toString()));
667 // tried <td title="key"> but it failed to provide a tooltip :-(
668 String attDesc = null;
669 if (metadata != null)
671 attDesc = metadata.getAttributeName(key);
673 String s = entry.getValue().toString();
674 if (isValueInteresting(key, s, metadata))
676 sb.append(String.format(ROW_DATA, key, attDesc == null ? ""
682 sb.append("</table>");
684 String text = sb.toString();
689 * Answers true if we judge the value is worth displaying, by some heuristic
697 boolean isValueInteresting(String key, String value,
698 FeatureSourceI metadata)
701 * currently suppressing zero values as well as null or empty
703 if (value == null || "".equals(value) || ".".equals(value)
704 || "0".equals(value))
709 if (metadata == null)
714 FeatureAttributeType attType = metadata.getAttributeType(key);
716 && (attType == FeatureAttributeType.Float || attType
717 .equals(FeatureAttributeType.Integer)))
721 float fval = Float.valueOf(value);
726 } catch (NumberFormatException e)
732 return true; // default to interesting
736 * Sets the feature source identifier
740 public void setSource(String theSource)
746 class SFSortByEnd implements Comparator<SequenceFeature>
749 public int compare(SequenceFeature a, SequenceFeature b)
751 return a.getEnd() - b.getEnd();
755 class SFSortByBegin implements Comparator<SequenceFeature>
758 public int compare(SequenceFeature a, SequenceFeature b)
760 return a.getBegin() - b.getBegin();