2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.datamodel;
23 import jalview.datamodel.features.FeatureLocationI;
24 import jalview.util.StringUtils;
26 import java.util.HashMap;
28 import java.util.Map.Entry;
29 import java.util.TreeMap;
30 import java.util.Vector;
33 * A class that models a single contiguous feature on a sequence. If flag
34 * 'contactFeature' is true, the start and end positions are interpreted instead
35 * as two contact points.
37 public class SequenceFeature implements FeatureLocationI
40 * score value if none is set; preferably Float.Nan, but see
41 * JAL-2060 and JAL-2554 for a couple of blockers to that
43 private static final float NO_SCORE = 0f;
45 private static final String STATUS = "status";
47 private static final String STRAND = "STRAND";
49 // private key for Phase designed not to conflict with real GFF data
50 private static final String PHASE = "!Phase";
52 // private key for ENA location designed not to conflict with real GFF data
53 private static final String LOCATION = "!Location";
55 private static final String ROW_DATA = "<tr><td>%s</td><td>%s</td></tr>";
58 * map of otherDetails special keys, and their value fields' delimiter
60 private static final Map<String, String> INFO_KEYS = new HashMap<>();
64 INFO_KEYS.put("CSQ", ",");
68 * ATTRIBUTES is reserved for the GFF 'column 9' data, formatted as
69 * name1=value1;name2=value2,value3;...etc
71 private static final String ATTRIBUTES = "ATTRIBUTES";
74 * type, begin, end, featureGroup, score and contactFeature are final
75 * to ensure that the integrity of SequenceFeatures data store
76 * can't be broken by direct update of these fields
78 public final String type;
80 public final int begin;
84 public final String featureGroup;
86 public final float score;
88 private final boolean contactFeature;
90 public String description;
93 * a map of key-value pairs; may be populated from GFF 'column 9' data,
94 * other data sources (e.g. GenBank file), or programmatically
96 public Map<String, Object> otherDetails;
98 public Vector<String> links;
101 * Constructs a duplicate feature. Note: Uses makes a shallow copy of the
102 * otherDetails map, so the new and original SequenceFeature may reference the
103 * same objects in the map.
107 public SequenceFeature(SequenceFeature cpy)
109 this(cpy, cpy.getBegin(), cpy.getEnd(), cpy.getFeatureGroup(), cpy
122 public SequenceFeature(String theType, String theDesc, int theBegin,
123 int theEnd, String group)
125 this(theType, theDesc, theBegin, theEnd, NO_SCORE, group);
129 * Constructor including a score value
138 public SequenceFeature(String theType, String theDesc, int theBegin,
139 int theEnd, float theScore, String group)
142 this.description = theDesc;
143 this.begin = theBegin;
145 this.featureGroup = group;
146 this.score = theScore;
149 * for now, only "Disulfide/disulphide bond" is treated as a contact feature
151 this.contactFeature = "disulfide bond".equalsIgnoreCase(type)
152 || "disulphide bond".equalsIgnoreCase(type);
156 * A copy constructor that allows the value of final fields to be 'modified'
165 public SequenceFeature(SequenceFeature sf, String newType, int newBegin,
166 int newEnd, String newGroup, float newScore)
168 this(newType, sf.getDescription(), newBegin, newEnd, newScore,
171 if (sf.otherDetails != null)
173 otherDetails = new HashMap<String, Object>();
174 for (Entry<String, Object> entry : sf.otherDetails.entrySet())
176 otherDetails.put(entry.getKey(), entry.getValue());
179 if (sf.links != null && sf.links.size() > 0)
181 links = new Vector<String>();
182 for (int i = 0, iSize = sf.links.size(); i < iSize; i++)
184 links.addElement(sf.links.elementAt(i));
190 * A copy constructor that allows the value of final fields to be 'modified'
198 public SequenceFeature(SequenceFeature sf, int newBegin, int newEnd,
199 String newGroup, float newScore)
201 this(sf, sf.getType(), newBegin, newEnd, newGroup, newScore);
205 * Two features are considered equal if they have the same type, group,
206 * description, start, end, phase, strand, and (if present) 'Name', ID' and
207 * 'Parent' attributes.
209 * Note we need to check Parent to distinguish the same exon occurring in
210 * different transcripts (in Ensembl GFF). This allows assembly of transcript
211 * sequences from their component exon regions.
214 public boolean equals(Object o)
216 return equals(o, false);
220 * Overloaded method allows the equality test to optionally ignore the
221 * 'Parent' attribute of a feature. This supports avoiding adding many
222 * superficially duplicate 'exon' or CDS features to genomic or protein
226 * @param ignoreParent
229 public boolean equals(Object o, boolean ignoreParent)
231 if (o == null || !(o instanceof SequenceFeature))
236 SequenceFeature sf = (SequenceFeature) o;
237 boolean sameScore = Float.isNaN(score) ? Float.isNaN(sf.score)
239 if (begin != sf.begin || end != sf.end || !sameScore)
244 if (getStrand() != sf.getStrand())
249 if (!(type + description + featureGroup + getPhase()).equals(
250 sf.type + sf.description + sf.featureGroup + sf.getPhase()))
254 if (!equalAttribute(getValue("ID"), sf.getValue("ID")))
258 if (!equalAttribute(getValue("Name"), sf.getValue("Name")))
264 if (!equalAttribute(getValue("Parent"), sf.getValue("Parent")))
273 * Returns true if both values are null, are both non-null and equal
279 protected static boolean equalAttribute(Object att1, Object att2)
281 if (att1 == null && att2 == null)
287 return att1.equals(att2);
289 return att2.equals(att1);
295 * @return DOCUMENT ME!
298 public int getBegin()
306 * @return DOCUMENT ME!
317 * @return DOCUMENT ME!
319 public String getType()
327 * @return DOCUMENT ME!
329 public String getDescription()
334 public void setDescription(String desc)
339 public String getFeatureGroup()
344 public void addLink(String labelLink)
348 links = new Vector<String>();
351 if (!links.contains(labelLink))
353 links.insertElementAt(labelLink, 0);
357 public float getScore()
363 * Used for getting values which are not in the basic set. eg STRAND, PHASE
369 public Object getValue(String key)
371 if (otherDetails == null)
377 return otherDetails.get(key);
382 * Returns a property value for the given key if known, else the specified
386 * @param defaultValue
389 public Object getValue(String key, Object defaultValue)
391 Object value = getValue(key);
392 return value == null ? defaultValue : value;
396 * Used for setting values which are not in the basic set. eg STRAND, FRAME
404 public void setValue(String key, Object value)
408 if (otherDetails == null)
410 otherDetails = new HashMap<String, Object>();
413 otherDetails.put(key, value);
418 * The following methods are added to maintain the castor Uniprot mapping file
421 public void setStatus(String status)
423 setValue(STATUS, status);
426 public String getStatus()
428 return (String) getValue(STATUS);
431 public void setAttributes(String attr)
433 setValue(ATTRIBUTES, attr);
436 public String getAttributes()
438 return (String) getValue(ATTRIBUTES);
442 * Return 1 for forward strand ('+' in GFF), -1 for reverse strand ('-' in
443 * GFF), and 0 for unknown or not (validly) specified
447 public int getStrand()
450 if (otherDetails != null)
452 Object str = otherDetails.get(STRAND);
457 else if ("+".equals(str))
466 * Set the value of strand
469 * should be "+" for forward, or "-" for reverse
471 public void setStrand(String strand)
473 setValue(STRAND, strand);
476 public void setPhase(String phase)
478 setValue(PHASE, phase);
481 public String getPhase()
483 return (String) getValue(PHASE);
487 * Sets the 'raw' ENA format location specifier e.g. join(12..45,89..121)
491 public void setEnaLocation(String loc)
493 setValue(LOCATION, loc);
497 * Gets the 'raw' ENA format location specifier e.g. join(12..45,89..121)
501 public String getEnaLocation()
503 return (String) getValue(LOCATION);
507 * Readable representation, for debug only, not guaranteed not to change
511 public String toString()
513 return String.format("%d %d %s %s", getBegin(), getEnd(), getType(),
518 * Overridden to ensure that whenever two objects are equal, they have the
522 public int hashCode()
524 String s = getType() + getDescription() + getFeatureGroup()
525 + getValue("ID") + getValue("Name") + getValue("Parent")
527 return s.hashCode() + getBegin() + getEnd() + (int) getScore()
532 * Answers true if the feature's start/end values represent two related
533 * positions, rather than ends of a range. Such features may be visualised or
534 * reported differently to features on a range.
537 public boolean isContactFeature()
539 return contactFeature;
543 * Answers true if the sequence has zero start and end position
547 public boolean isNonPositional()
549 return begin == 0 && end == 0;
553 * Answers an html-formatted report of feature details
557 public String getDetailsReport()
559 StringBuilder sb = new StringBuilder(128);
561 sb.append("<table>");
562 sb.append(String.format(ROW_DATA, "Type", type));
563 sb.append(String.format(ROW_DATA, "Start/end", begin == end ? begin
564 : begin + (isContactFeature() ? ":" : "-") + end));
565 String desc = StringUtils.stripHtmlTags(description);
566 sb.append(String.format(ROW_DATA, "Description", desc));
567 if (!Float.isNaN(score) && score != 0f)
569 sb.append(String.format(ROW_DATA, "Score", score));
571 if (featureGroup != null)
573 sb.append(String.format(ROW_DATA, "Group", featureGroup));
576 if (otherDetails != null)
578 TreeMap<String, Object> ordered = new TreeMap<>(
579 String.CASE_INSENSITIVE_ORDER);
580 ordered.putAll(otherDetails);
582 for (Entry<String, Object> entry : ordered.entrySet())
584 String key = entry.getKey();
585 if (ATTRIBUTES.equals(key))
587 continue; // to avoid double reporting
589 sb.append("<tr><td>").append(key).append("</td><td>");
590 if (INFO_KEYS.containsKey(key))
593 * split selected INFO data by delimiter over multiple lines
595 sb.append("</td></tr>");
596 String delimiter = INFO_KEYS.get(key);
597 String[] values = entry.getValue().toString().split(delimiter);
598 for (String value : values)
600 sb.append("<tr><td> </td><td>").append(value)
601 .append("</td></tr>");
606 sb.append(entry.getValue().toString()).append("</td></tr>");
610 sb.append("</table>");
612 String text = sb.toString();