2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.datamodel;
23 import jalview.datamodel.features.FeatureLocationI;
24 import jalview.util.StringUtils;
26 import java.util.HashMap;
28 import java.util.Map.Entry;
29 import java.util.TreeMap;
30 import java.util.Vector;
33 * A class that models a single contiguous feature on a sequence. If flag
34 * 'contactFeature' is true, the start and end positions are interpreted instead
35 * as two contact points.
37 public class SequenceFeature implements FeatureLocationI
40 * score value if none is set; preferably Float.Nan, but see
41 * JAL-2060 and JAL-2554 for a couple of blockers to that
43 private static final float NO_SCORE = 0f;
45 private static final String STATUS = "status";
47 private static final String STRAND = "STRAND";
49 // private key for Phase designed not to conflict with real GFF data
50 private static final String PHASE = "!Phase";
52 // private key for ENA location designed not to conflict with real GFF data
53 private static final String LOCATION = "!Location";
55 private static final String ROW_DATA = "<tr><td>%s</td><td>%s</td></tr>";
58 * map of otherDetails special keys, and their value fields' delimiter
60 private static final Map<String, String> INFO_KEYS = new HashMap<>();
64 INFO_KEYS.put("CSQ", ",");
65 // todo capture second level metadata (CSQ FORMAT)
66 // and delimiter "|" so as to report in a table within a table?
70 * ATTRIBUTES is reserved for the GFF 'column 9' data, formatted as
71 * name1=value1;name2=value2,value3;...etc
73 private static final String ATTRIBUTES = "ATTRIBUTES";
76 * type, begin, end, featureGroup, score and contactFeature are final
77 * to ensure that the integrity of SequenceFeatures data store
78 * can't be broken by direct update of these fields
80 public final String type;
82 public final int begin;
86 public final String featureGroup;
88 public final float score;
90 private final boolean contactFeature;
92 public String description;
95 * a map of key-value pairs; may be populated from GFF 'column 9' data,
96 * other data sources (e.g. GenBank file), or programmatically
98 public Map<String, Object> otherDetails;
100 public Vector<String> links;
103 * Constructs a duplicate feature. Note: Uses makes a shallow copy of the
104 * otherDetails map, so the new and original SequenceFeature may reference the
105 * same objects in the map.
109 public SequenceFeature(SequenceFeature cpy)
111 this(cpy, cpy.getBegin(), cpy.getEnd(), cpy.getFeatureGroup(), cpy
124 public SequenceFeature(String theType, String theDesc, int theBegin,
125 int theEnd, String group)
127 this(theType, theDesc, theBegin, theEnd, NO_SCORE, group);
131 * Constructor including a score value
140 public SequenceFeature(String theType, String theDesc, int theBegin,
141 int theEnd, float theScore, String group)
144 this.description = theDesc;
145 this.begin = theBegin;
147 this.featureGroup = group;
148 this.score = theScore;
151 * for now, only "Disulfide/disulphide bond" is treated as a contact feature
153 this.contactFeature = "disulfide bond".equalsIgnoreCase(type)
154 || "disulphide bond".equalsIgnoreCase(type);
158 * A copy constructor that allows the value of final fields to be 'modified'
167 public SequenceFeature(SequenceFeature sf, String newType, int newBegin,
168 int newEnd, String newGroup, float newScore)
170 this(newType, sf.getDescription(), newBegin, newEnd, newScore,
173 if (sf.otherDetails != null)
175 otherDetails = new HashMap<String, Object>();
176 for (Entry<String, Object> entry : sf.otherDetails.entrySet())
178 otherDetails.put(entry.getKey(), entry.getValue());
181 if (sf.links != null && sf.links.size() > 0)
183 links = new Vector<String>();
184 for (int i = 0, iSize = sf.links.size(); i < iSize; i++)
186 links.addElement(sf.links.elementAt(i));
192 * A copy constructor that allows the value of final fields to be 'modified'
200 public SequenceFeature(SequenceFeature sf, int newBegin, int newEnd,
201 String newGroup, float newScore)
203 this(sf, sf.getType(), newBegin, newEnd, newGroup, newScore);
207 * Two features are considered equal if they have the same type, group,
208 * description, start, end, phase, strand, and (if present) 'Name', ID' and
209 * 'Parent' attributes.
211 * Note we need to check Parent to distinguish the same exon occurring in
212 * different transcripts (in Ensembl GFF). This allows assembly of transcript
213 * sequences from their component exon regions.
216 public boolean equals(Object o)
218 return equals(o, false);
222 * Overloaded method allows the equality test to optionally ignore the
223 * 'Parent' attribute of a feature. This supports avoiding adding many
224 * superficially duplicate 'exon' or CDS features to genomic or protein
228 * @param ignoreParent
231 public boolean equals(Object o, boolean ignoreParent)
233 if (o == null || !(o instanceof SequenceFeature))
238 SequenceFeature sf = (SequenceFeature) o;
239 boolean sameScore = Float.isNaN(score) ? Float.isNaN(sf.score)
241 if (begin != sf.begin || end != sf.end || !sameScore)
246 if (getStrand() != sf.getStrand())
251 if (!(type + description + featureGroup + getPhase()).equals(
252 sf.type + sf.description + sf.featureGroup + sf.getPhase()))
256 if (!equalAttribute(getValue("ID"), sf.getValue("ID")))
260 if (!equalAttribute(getValue("Name"), sf.getValue("Name")))
266 if (!equalAttribute(getValue("Parent"), sf.getValue("Parent")))
275 * Returns true if both values are null, are both non-null and equal
281 protected static boolean equalAttribute(Object att1, Object att2)
283 if (att1 == null && att2 == null)
289 return att1.equals(att2);
291 return att2.equals(att1);
297 * @return DOCUMENT ME!
300 public int getBegin()
308 * @return DOCUMENT ME!
319 * @return DOCUMENT ME!
321 public String getType()
329 * @return DOCUMENT ME!
331 public String getDescription()
336 public void setDescription(String desc)
341 public String getFeatureGroup()
346 public void addLink(String labelLink)
350 links = new Vector<String>();
353 if (!links.contains(labelLink))
355 links.insertElementAt(labelLink, 0);
359 public float getScore()
365 * Used for getting values which are not in the basic set. eg STRAND, PHASE
371 public Object getValue(String key)
373 if (otherDetails == null)
379 return otherDetails.get(key);
384 * Returns a property value for the given key if known, else the specified
388 * @param defaultValue
391 public Object getValue(String key, Object defaultValue)
393 Object value = getValue(key);
394 return value == null ? defaultValue : value;
398 * Used for setting values which are not in the basic set. eg STRAND, FRAME
406 public void setValue(String key, Object value)
410 if (otherDetails == null)
412 otherDetails = new HashMap<String, Object>();
415 otherDetails.put(key, value);
420 * The following methods are added to maintain the castor Uniprot mapping file
423 public void setStatus(String status)
425 setValue(STATUS, status);
428 public String getStatus()
430 return (String) getValue(STATUS);
433 public void setAttributes(String attr)
435 setValue(ATTRIBUTES, attr);
438 public String getAttributes()
440 return (String) getValue(ATTRIBUTES);
444 * Return 1 for forward strand ('+' in GFF), -1 for reverse strand ('-' in
445 * GFF), and 0 for unknown or not (validly) specified
449 public int getStrand()
452 if (otherDetails != null)
454 Object str = otherDetails.get(STRAND);
459 else if ("+".equals(str))
468 * Set the value of strand
471 * should be "+" for forward, or "-" for reverse
473 public void setStrand(String strand)
475 setValue(STRAND, strand);
478 public void setPhase(String phase)
480 setValue(PHASE, phase);
483 public String getPhase()
485 return (String) getValue(PHASE);
489 * Sets the 'raw' ENA format location specifier e.g. join(12..45,89..121)
493 public void setEnaLocation(String loc)
495 setValue(LOCATION, loc);
499 * Gets the 'raw' ENA format location specifier e.g. join(12..45,89..121)
503 public String getEnaLocation()
505 return (String) getValue(LOCATION);
509 * Readable representation, for debug only, not guaranteed not to change
513 public String toString()
515 return String.format("%d %d %s %s", getBegin(), getEnd(), getType(),
520 * Overridden to ensure that whenever two objects are equal, they have the
524 public int hashCode()
526 String s = getType() + getDescription() + getFeatureGroup()
527 + getValue("ID") + getValue("Name") + getValue("Parent")
529 return s.hashCode() + getBegin() + getEnd() + (int) getScore()
534 * Answers true if the feature's start/end values represent two related
535 * positions, rather than ends of a range. Such features may be visualised or
536 * reported differently to features on a range.
539 public boolean isContactFeature()
541 return contactFeature;
545 * Answers true if the sequence has zero start and end position
549 public boolean isNonPositional()
551 return begin == 0 && end == 0;
555 * Answers an html-formatted report of feature details
559 public String getDetailsReport()
561 StringBuilder sb = new StringBuilder(128);
563 sb.append("<table>");
564 sb.append(String.format(ROW_DATA, "Type", type));
565 sb.append(String.format(ROW_DATA, "Start/end", begin == end ? begin
566 : begin + (isContactFeature() ? ":" : "-") + end));
567 String desc = StringUtils.stripHtmlTags(description);
568 sb.append(String.format(ROW_DATA, "Description", desc));
569 if (!Float.isNaN(score) && score != 0f)
571 sb.append(String.format(ROW_DATA, "Score", score));
573 if (featureGroup != null)
575 sb.append(String.format(ROW_DATA, "Group", featureGroup));
578 if (otherDetails != null)
580 TreeMap<String, Object> ordered = new TreeMap<>(
581 String.CASE_INSENSITIVE_ORDER);
582 ordered.putAll(otherDetails);
584 for (Entry<String, Object> entry : ordered.entrySet())
586 String key = entry.getKey();
587 if (ATTRIBUTES.equals(key))
589 continue; // to avoid double reporting
591 if (INFO_KEYS.containsKey(key))
594 * split selected INFO data by delimiter over multiple lines
596 String delimiter = INFO_KEYS.get(key);
597 String[] values = entry.getValue().toString().split(delimiter);
598 for (String value : values)
600 sb.append("<tr><td>").append(key).append("</td><td>")
602 .append("</td></tr>");
606 { // tried <td title="key"> but it failed to provide a tooltip :-(
607 sb.append("<tr><td>").append(key).append("</td><td>");
608 sb.append(entry.getValue().toString()).append("</td></tr>");
612 sb.append("</table>");
614 String text = sb.toString();