2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.datamodel;
23 import jalview.datamodel.features.FeatureAttributeType;
24 import jalview.datamodel.features.FeatureLocationI;
25 import jalview.datamodel.features.FeatureSourceI;
26 import jalview.datamodel.features.FeatureSources;
27 import jalview.util.StringUtils;
29 import java.util.HashMap;
31 import java.util.Map.Entry;
32 import java.util.TreeMap;
33 import java.util.Vector;
36 * A class that models a single contiguous feature on a sequence. If flag
37 * 'contactFeature' is true, the start and end positions are interpreted instead
38 * as two contact points.
40 public class SequenceFeature implements FeatureLocationI
43 * score value if none is set; preferably Float.Nan, but see
44 * JAL-2060 and JAL-2554 for a couple of blockers to that
46 private static final float NO_SCORE = 0f;
48 private static final String STATUS = "status";
50 private static final String STRAND = "STRAND";
52 // private key for Phase designed not to conflict with real GFF data
53 private static final String PHASE = "!Phase";
55 // private key for ENA location designed not to conflict with real GFF data
56 private static final String LOCATION = "!Location";
58 private static final String ROW_DATA = "<tr><td width=\"10%%\">%s</td><td width=\"50%%\">%s</td><td width=\"40%%\">%s</td></tr>";
61 * map of otherDetails special keys, and their value fields' delimiter
63 private static final Map<String, String> INFO_KEYS = new HashMap<>();
67 INFO_KEYS.put("CSQ", ",");
68 // todo capture second level metadata (CSQ FORMAT)
69 // and delimiter "|" so as to report in a table within a table?
73 * ATTRIBUTES is reserved for the GFF 'column 9' data, formatted as
74 * name1=value1;name2=value2,value3;...etc
76 private static final String ATTRIBUTES = "ATTRIBUTES";
79 * type, begin, end, featureGroup, score and contactFeature are final
80 * to ensure that the integrity of SequenceFeatures data store
81 * can't be broken by direct update of these fields
83 public final String type;
85 public final int begin;
89 public final String featureGroup;
91 public final float score;
93 private final boolean contactFeature;
95 public String description;
98 * a map of key-value pairs; may be populated from GFF 'column 9' data,
99 * other data sources (e.g. GenBank file), or programmatically
101 public Map<String, Object> otherDetails;
103 public Vector<String> links;
106 * the identifier (if known) for the FeatureSource held in FeatureSources,
107 * as a provider of metadata about feature attributes
109 private String source;
112 * Constructs a duplicate feature. Note: Uses makes a shallow copy of the
113 * otherDetails map, so the new and original SequenceFeature may reference the
114 * same objects in the map.
118 public SequenceFeature(SequenceFeature cpy)
120 this(cpy, cpy.getBegin(), cpy.getEnd(), cpy.getFeatureGroup(), cpy
133 public SequenceFeature(String theType, String theDesc, int theBegin,
134 int theEnd, String group)
136 this(theType, theDesc, theBegin, theEnd, NO_SCORE, group);
140 * Constructor including a score value
149 public SequenceFeature(String theType, String theDesc, int theBegin,
150 int theEnd, float theScore, String group)
153 this.description = theDesc;
154 this.begin = theBegin;
156 this.featureGroup = group;
157 this.score = theScore;
160 * for now, only "Disulfide/disulphide bond" is treated as a contact feature
162 this.contactFeature = "disulfide bond".equalsIgnoreCase(type)
163 || "disulphide bond".equalsIgnoreCase(type);
167 * A copy constructor that allows the value of final fields to be 'modified'
176 public SequenceFeature(SequenceFeature sf, String newType, int newBegin,
177 int newEnd, String newGroup, float newScore)
179 this(newType, sf.getDescription(), newBegin, newEnd, newScore,
182 if (sf.otherDetails != null)
184 otherDetails = new HashMap<String, Object>();
185 for (Entry<String, Object> entry : sf.otherDetails.entrySet())
187 otherDetails.put(entry.getKey(), entry.getValue());
190 if (sf.links != null && sf.links.size() > 0)
192 links = new Vector<String>();
193 for (int i = 0, iSize = sf.links.size(); i < iSize; i++)
195 links.addElement(sf.links.elementAt(i));
201 * A copy constructor that allows the value of final fields to be 'modified'
209 public SequenceFeature(SequenceFeature sf, int newBegin, int newEnd,
210 String newGroup, float newScore)
212 this(sf, sf.getType(), newBegin, newEnd, newGroup, newScore);
216 * Two features are considered equal if they have the same type, group,
217 * description, start, end, phase, strand, and (if present) 'Name', ID' and
218 * 'Parent' attributes.
220 * Note we need to check Parent to distinguish the same exon occurring in
221 * different transcripts (in Ensembl GFF). This allows assembly of transcript
222 * sequences from their component exon regions.
225 public boolean equals(Object o)
227 return equals(o, false);
231 * Overloaded method allows the equality test to optionally ignore the
232 * 'Parent' attribute of a feature. This supports avoiding adding many
233 * superficially duplicate 'exon' or CDS features to genomic or protein
237 * @param ignoreParent
240 public boolean equals(Object o, boolean ignoreParent)
242 if (o == null || !(o instanceof SequenceFeature))
247 SequenceFeature sf = (SequenceFeature) o;
248 boolean sameScore = Float.isNaN(score) ? Float.isNaN(sf.score)
250 if (begin != sf.begin || end != sf.end || !sameScore)
255 if (getStrand() != sf.getStrand())
260 if (!(type + description + featureGroup + getPhase()).equals(
261 sf.type + sf.description + sf.featureGroup + sf.getPhase()))
265 if (!equalAttribute(getValue("ID"), sf.getValue("ID")))
269 if (!equalAttribute(getValue("Name"), sf.getValue("Name")))
275 if (!equalAttribute(getValue("Parent"), sf.getValue("Parent")))
284 * Returns true if both values are null, are both non-null and equal
290 protected static boolean equalAttribute(Object att1, Object att2)
292 if (att1 == null && att2 == null)
298 return att1.equals(att2);
300 return att2.equals(att1);
306 * @return DOCUMENT ME!
309 public int getBegin()
317 * @return DOCUMENT ME!
328 * @return DOCUMENT ME!
330 public String getType()
338 * @return DOCUMENT ME!
340 public String getDescription()
345 public void setDescription(String desc)
350 public String getFeatureGroup()
355 public void addLink(String labelLink)
359 links = new Vector<String>();
362 if (!links.contains(labelLink))
364 links.insertElementAt(labelLink, 0);
368 public float getScore()
374 * Used for getting values which are not in the basic set. eg STRAND, PHASE
380 public Object getValue(String key)
382 if (otherDetails == null)
388 return otherDetails.get(key);
393 * Returns a property value for the given key if known, else the specified
397 * @param defaultValue
400 public Object getValue(String key, Object defaultValue)
402 Object value = getValue(key);
403 return value == null ? defaultValue : value;
407 * Used for setting values which are not in the basic set. eg STRAND, FRAME
415 public void setValue(String key, Object value)
419 if (otherDetails == null)
421 otherDetails = new HashMap<String, Object>();
424 otherDetails.put(key, value);
429 * The following methods are added to maintain the castor Uniprot mapping file
432 public void setStatus(String status)
434 setValue(STATUS, status);
437 public String getStatus()
439 return (String) getValue(STATUS);
442 public void setAttributes(String attr)
444 setValue(ATTRIBUTES, attr);
447 public String getAttributes()
449 return (String) getValue(ATTRIBUTES);
453 * Return 1 for forward strand ('+' in GFF), -1 for reverse strand ('-' in
454 * GFF), and 0 for unknown or not (validly) specified
458 public int getStrand()
461 if (otherDetails != null)
463 Object str = otherDetails.get(STRAND);
468 else if ("+".equals(str))
477 * Set the value of strand
480 * should be "+" for forward, or "-" for reverse
482 public void setStrand(String strand)
484 setValue(STRAND, strand);
487 public void setPhase(String phase)
489 setValue(PHASE, phase);
492 public String getPhase()
494 return (String) getValue(PHASE);
498 * Sets the 'raw' ENA format location specifier e.g. join(12..45,89..121)
502 public void setEnaLocation(String loc)
504 setValue(LOCATION, loc);
508 * Gets the 'raw' ENA format location specifier e.g. join(12..45,89..121)
512 public String getEnaLocation()
514 return (String) getValue(LOCATION);
518 * Readable representation, for debug only, not guaranteed not to change
522 public String toString()
524 return String.format("%d %d %s %s", getBegin(), getEnd(), getType(),
529 * Overridden to ensure that whenever two objects are equal, they have the
533 public int hashCode()
535 String s = getType() + getDescription() + getFeatureGroup()
536 + getValue("ID") + getValue("Name") + getValue("Parent")
538 return s.hashCode() + getBegin() + getEnd() + (int) getScore()
543 * Answers true if the feature's start/end values represent two related
544 * positions, rather than ends of a range. Such features may be visualised or
545 * reported differently to features on a range.
548 public boolean isContactFeature()
550 return contactFeature;
554 * Answers true if the sequence has zero start and end position
558 public boolean isNonPositional()
560 return begin == 0 && end == 0;
564 * Answers an html-formatted report of feature details
568 public String getDetailsReport()
570 FeatureSourceI metadata = FeatureSources.getInstance()
573 StringBuilder sb = new StringBuilder(128);
575 sb.append("<table>");
576 sb.append(String.format(ROW_DATA, "Type", type, ""));
577 sb.append(String.format(ROW_DATA, "Start/end", begin == end ? begin
578 : begin + (isContactFeature() ? ":" : "-") + end, ""));
579 String desc = StringUtils.stripHtmlTags(description);
580 sb.append(String.format(ROW_DATA, "Description", desc, ""));
581 if (!Float.isNaN(score) && score != 0f)
583 sb.append(String.format(ROW_DATA, "Score", score, ""));
585 if (featureGroup != null)
587 sb.append(String.format(ROW_DATA, "Group", featureGroup, ""));
590 if (otherDetails != null)
592 TreeMap<String, Object> ordered = new TreeMap<>(
593 String.CASE_INSENSITIVE_ORDER);
594 ordered.putAll(otherDetails);
596 for (Entry<String, Object> entry : ordered.entrySet())
598 String key = entry.getKey();
599 if (ATTRIBUTES.equals(key))
601 continue; // to avoid double reporting
603 if (INFO_KEYS.containsKey(key))
606 * split selected INFO data by delimiter over multiple lines
608 String delimiter = INFO_KEYS.get(key);
609 String[] values = entry.getValue().toString().split(delimiter);
610 for (String value : values)
612 sb.append(String.format(ROW_DATA, key, "", value));
616 { // tried <td title="key"> but it failed to provide a tooltip :-(
617 String attDesc = null;
618 if (metadata != null)
620 attDesc = metadata.getAttributeName(key);
622 String value = entry.getValue().toString();
623 if (isValueInteresting(key, value, metadata))
625 sb.append(String.format(ROW_DATA, key, attDesc == null ? ""
631 sb.append("</table>");
633 String text = sb.toString();
638 * Answers true if we judge the value is worth displaying, by some heuristic
646 boolean isValueInteresting(String key, String value,
647 FeatureSourceI metadata)
650 * currently suppressing zero values as well as null or empty
652 if (value == null || "".equals(value) || ".".equals(value)
653 || "0".equals(value))
658 if (metadata == null)
663 FeatureAttributeType attributeType = metadata.getAttributeType(key);
664 if (attributeType == FeatureAttributeType.Float
665 || attributeType.equals(FeatureAttributeType.Integer))
669 float fval = Float.valueOf(value);
674 } catch (NumberFormatException e)
680 return true; // default to interesting
684 * Sets the feature source identifier
688 public void setSource(String theSource)