2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.datamodel;
23 import jalview.datamodel.features.FeatureAttributeType;
24 import jalview.datamodel.features.FeatureLocationI;
25 import jalview.datamodel.features.FeatureSourceI;
26 import jalview.datamodel.features.FeatureSources;
27 import jalview.util.StringUtils;
29 import java.util.HashMap;
31 import java.util.Map.Entry;
32 import java.util.TreeMap;
33 import java.util.Vector;
36 * A class that models a single contiguous feature on a sequence. If flag
37 * 'contactFeature' is true, the start and end positions are interpreted instead
38 * as two contact points.
40 public class SequenceFeature implements FeatureLocationI
43 * score value if none is set; preferably Float.Nan, but see
44 * JAL-2060 and JAL-2554 for a couple of blockers to that
46 private static final float NO_SCORE = 0f;
48 private static final String STATUS = "status";
50 private static final String STRAND = "STRAND";
52 // private key for Phase designed not to conflict with real GFF data
53 private static final String PHASE = "!Phase";
55 // private key for ENA location designed not to conflict with real GFF data
56 private static final String LOCATION = "!Location";
58 private static final String ROW_DATA = "<tr><td>%s</td><td>%s</td><td>%s</td></tr>";
61 * map of otherDetails special keys, and their value fields' delimiter
63 private static final Map<String, String> INFO_KEYS = new HashMap<>();
67 INFO_KEYS.put("CSQ", ",");
68 // todo capture second level metadata (CSQ FORMAT)
69 // and delimiter "|" so as to report in a table within a table?
73 * ATTRIBUTES is reserved for the GFF 'column 9' data, formatted as
74 * name1=value1;name2=value2,value3;...etc
76 private static final String ATTRIBUTES = "ATTRIBUTES";
79 * type, begin, end, featureGroup, score and contactFeature are final
80 * to ensure that the integrity of SequenceFeatures data store
81 * can't be broken by direct update of these fields
83 public final String type;
85 public final int begin;
89 public final String featureGroup;
91 public final float score;
93 private final boolean contactFeature;
95 public String description;
98 * a map of key-value pairs; may be populated from GFF 'column 9' data,
99 * other data sources (e.g. GenBank file), or programmatically
101 public Map<String, Object> otherDetails;
103 public Vector<String> links;
106 * the identifier (if known) for the FeatureSource held in FeatureSources,
107 * as a provider of metadata about feature attributes
109 private String source;
112 * Constructs a duplicate feature. Note: Uses makes a shallow copy of the
113 * otherDetails map, so the new and original SequenceFeature may reference the
114 * same objects in the map.
118 public SequenceFeature(SequenceFeature cpy)
120 this(cpy, cpy.getBegin(), cpy.getEnd(), cpy.getFeatureGroup(), cpy
133 public SequenceFeature(String theType, String theDesc, int theBegin,
134 int theEnd, String group)
136 this(theType, theDesc, theBegin, theEnd, NO_SCORE, group);
140 * Constructor including a score value
149 public SequenceFeature(String theType, String theDesc, int theBegin,
150 int theEnd, float theScore, String group)
153 this.description = theDesc;
154 this.begin = theBegin;
156 this.featureGroup = group;
157 this.score = theScore;
160 * for now, only "Disulfide/disulphide bond" is treated as a contact feature
162 this.contactFeature = "disulfide bond".equalsIgnoreCase(type)
163 || "disulphide bond".equalsIgnoreCase(type);
167 * A copy constructor that allows the value of final fields to be 'modified'
176 public SequenceFeature(SequenceFeature sf, String newType, int newBegin,
177 int newEnd, String newGroup, float newScore)
179 this(newType, sf.getDescription(), newBegin, newEnd, newScore,
182 this.source = sf.source;
184 if (sf.otherDetails != null)
186 otherDetails = new HashMap<String, Object>();
187 for (Entry<String, Object> entry : sf.otherDetails.entrySet())
189 otherDetails.put(entry.getKey(), entry.getValue());
192 if (sf.links != null && sf.links.size() > 0)
194 links = new Vector<String>();
195 for (int i = 0, iSize = sf.links.size(); i < iSize; i++)
197 links.addElement(sf.links.elementAt(i));
203 * A copy constructor that allows the value of final fields to be 'modified'
211 public SequenceFeature(SequenceFeature sf, int newBegin, int newEnd,
212 String newGroup, float newScore)
214 this(sf, sf.getType(), newBegin, newEnd, newGroup, newScore);
218 * Two features are considered equal if they have the same type, group,
219 * description, start, end, phase, strand, and (if present) 'Name', ID' and
220 * 'Parent' attributes.
222 * Note we need to check Parent to distinguish the same exon occurring in
223 * different transcripts (in Ensembl GFF). This allows assembly of transcript
224 * sequences from their component exon regions.
227 public boolean equals(Object o)
229 return equals(o, false);
233 * Overloaded method allows the equality test to optionally ignore the
234 * 'Parent' attribute of a feature. This supports avoiding adding many
235 * superficially duplicate 'exon' or CDS features to genomic or protein
239 * @param ignoreParent
242 public boolean equals(Object o, boolean ignoreParent)
244 if (o == null || !(o instanceof SequenceFeature))
249 SequenceFeature sf = (SequenceFeature) o;
250 boolean sameScore = Float.isNaN(score) ? Float.isNaN(sf.score)
252 if (begin != sf.begin || end != sf.end || !sameScore)
257 if (getStrand() != sf.getStrand())
262 if (!(type + description + featureGroup + getPhase()).equals(
263 sf.type + sf.description + sf.featureGroup + sf.getPhase()))
267 if (!equalAttribute(getValue("ID"), sf.getValue("ID")))
271 if (!equalAttribute(getValue("Name"), sf.getValue("Name")))
277 if (!equalAttribute(getValue("Parent"), sf.getValue("Parent")))
286 * Returns true if both values are null, are both non-null and equal
292 protected static boolean equalAttribute(Object att1, Object att2)
294 if (att1 == null && att2 == null)
300 return att1.equals(att2);
302 return att2.equals(att1);
308 * @return DOCUMENT ME!
311 public int getBegin()
319 * @return DOCUMENT ME!
330 * @return DOCUMENT ME!
332 public String getType()
340 * @return DOCUMENT ME!
342 public String getDescription()
347 public void setDescription(String desc)
352 public String getFeatureGroup()
357 public void addLink(String labelLink)
361 links = new Vector<String>();
364 if (!links.contains(labelLink))
366 links.insertElementAt(labelLink, 0);
370 public float getScore()
376 * Used for getting values which are not in the basic set. eg STRAND, PHASE
382 public Object getValue(String key)
384 if (otherDetails == null)
390 return otherDetails.get(key);
395 * Returns a property value for the given key if known, else the specified
399 * @param defaultValue
402 public Object getValue(String key, Object defaultValue)
404 Object value = getValue(key);
405 return value == null ? defaultValue : value;
409 * Used for setting values which are not in the basic set. eg STRAND, FRAME
417 public void setValue(String key, Object value)
421 if (otherDetails == null)
423 otherDetails = new HashMap<String, Object>();
426 otherDetails.put(key, value);
431 * The following methods are added to maintain the castor Uniprot mapping file
434 public void setStatus(String status)
436 setValue(STATUS, status);
439 public String getStatus()
441 return (String) getValue(STATUS);
444 public void setAttributes(String attr)
446 setValue(ATTRIBUTES, attr);
449 public String getAttributes()
451 return (String) getValue(ATTRIBUTES);
455 * Return 1 for forward strand ('+' in GFF), -1 for reverse strand ('-' in
456 * GFF), and 0 for unknown or not (validly) specified
460 public int getStrand()
463 if (otherDetails != null)
465 Object str = otherDetails.get(STRAND);
470 else if ("+".equals(str))
479 * Set the value of strand
482 * should be "+" for forward, or "-" for reverse
484 public void setStrand(String strand)
486 setValue(STRAND, strand);
489 public void setPhase(String phase)
491 setValue(PHASE, phase);
494 public String getPhase()
496 return (String) getValue(PHASE);
500 * Sets the 'raw' ENA format location specifier e.g. join(12..45,89..121)
504 public void setEnaLocation(String loc)
506 setValue(LOCATION, loc);
510 * Gets the 'raw' ENA format location specifier e.g. join(12..45,89..121)
514 public String getEnaLocation()
516 return (String) getValue(LOCATION);
520 * Readable representation, for debug only, not guaranteed not to change
524 public String toString()
526 return String.format("%d %d %s %s", getBegin(), getEnd(), getType(),
531 * Overridden to ensure that whenever two objects are equal, they have the
535 public int hashCode()
537 String s = getType() + getDescription() + getFeatureGroup()
538 + getValue("ID") + getValue("Name") + getValue("Parent")
540 return s.hashCode() + getBegin() + getEnd() + (int) getScore()
545 * Answers true if the feature's start/end values represent two related
546 * positions, rather than ends of a range. Such features may be visualised or
547 * reported differently to features on a range.
550 public boolean isContactFeature()
552 return contactFeature;
556 * Answers true if the sequence has zero start and end position
560 public boolean isNonPositional()
562 return begin == 0 && end == 0;
566 * Answers an html-formatted report of feature details
570 public String getDetailsReport()
572 FeatureSourceI metadata = FeatureSources.getInstance()
575 StringBuilder sb = new StringBuilder(128);
577 sb.append("<table>");
578 sb.append(String.format(ROW_DATA, "Type", type, ""));
579 sb.append(String.format(ROW_DATA, "Start/end", begin == end ? begin
580 : begin + (isContactFeature() ? ":" : "-") + end, ""));
581 String desc = StringUtils.stripHtmlTags(description);
582 sb.append(String.format(ROW_DATA, "Description", desc, ""));
583 if (!Float.isNaN(score) && score != 0f)
585 sb.append(String.format(ROW_DATA, "Score", score, ""));
587 if (featureGroup != null)
589 sb.append(String.format(ROW_DATA, "Group", featureGroup, ""));
592 if (otherDetails != null)
594 TreeMap<String, Object> ordered = new TreeMap<>(
595 String.CASE_INSENSITIVE_ORDER);
596 ordered.putAll(otherDetails);
598 for (Entry<String, Object> entry : ordered.entrySet())
600 String key = entry.getKey();
601 if (ATTRIBUTES.equals(key))
603 continue; // to avoid double reporting
605 if (INFO_KEYS.containsKey(key))
608 * split selected INFO data by delimiter over multiple lines
610 String delimiter = INFO_KEYS.get(key);
611 String[] values = entry.getValue().toString().split(delimiter);
612 for (String value : values)
614 sb.append(String.format(ROW_DATA, key, "", value));
618 { // tried <td title="key"> but it failed to provide a tooltip :-(
619 String attDesc = null;
620 if (metadata != null)
622 attDesc = metadata.getAttributeName(key);
624 String value = entry.getValue().toString();
625 if (isValueInteresting(key, value, metadata))
627 sb.append(String.format(ROW_DATA, key, attDesc == null ? ""
633 sb.append("</table>");
635 String text = sb.toString();
640 * Answers true if we judge the value is worth displaying, by some heuristic
648 boolean isValueInteresting(String key, String value,
649 FeatureSourceI metadata)
652 * currently suppressing zero values as well as null or empty
654 if (value == null || "".equals(value) || ".".equals(value)
655 || "0".equals(value))
660 if (metadata == null)
665 FeatureAttributeType attType = metadata.getAttributeType(key);
667 && (attType == FeatureAttributeType.Float || attType
668 .equals(FeatureAttributeType.Integer)))
672 float fval = Float.valueOf(value);
677 } catch (NumberFormatException e)
683 return true; // default to interesting
687 * Sets the feature source identifier
691 public void setSource(String theSource)