2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.datamodel;
23 import java.util.Comparator;
24 import java.util.LinkedHashMap;
26 import java.util.Map.Entry;
27 import java.util.SortedMap;
28 import java.util.TreeMap;
29 import java.util.Vector;
31 import jalview.datamodel.features.FeatureAttributeType;
32 import jalview.datamodel.features.FeatureAttributes;
33 import jalview.datamodel.features.FeatureLocationI;
34 import jalview.datamodel.features.FeatureSourceI;
35 import jalview.datamodel.features.FeatureSources;
36 import jalview.util.StringUtils;
39 * A class that models a single contiguous feature on a sequence. If flag
40 * 'contactFeature' is true, the start and end positions are interpreted instead
41 * as two contact points.
43 public class SequenceFeature implements FeatureLocationI
46 * score value if none is set; preferably Float.Nan, but see
47 * JAL-2060 and JAL-2554 for a couple of blockers to that
49 private static final float NO_SCORE = 0f;
51 private static final String STATUS = "status";
53 public static final String STRAND = "STRAND";
55 // key for Phase designed not to conflict with real GFF data
56 public static final String PHASE = "!Phase";
58 // private key for ENA location designed not to conflict with real GFF data
59 private static final String LOCATION = "!Location";
61 private static final String ROW_DATA = "<tr><td>%s</td><td>%s</td><td>%s</td></tr>";
64 * type, begin, end, featureGroup, score and contactFeature are final
65 * to ensure that the integrity of SequenceFeatures data store
66 * can't be broken by direct update of these fields
68 public final String type;
70 public final int begin;
74 public final String featureGroup;
76 public final float score;
78 private final boolean contactFeature;
80 public String description;
83 * a map of key-value pairs; may be populated from GFF 'column 9' data,
84 * other data sources (e.g. GenBank file), or programmatically
86 public Map<String, Object> otherDetails;
88 public Vector<String> links;
91 * the identifier (if known) for the FeatureSource held in FeatureSources,
92 * as a provider of metadata about feature attributes
94 private String source;
97 * Constructs a duplicate feature. Note: Uses makes a shallow copy of the
98 * otherDetails map, so the new and original SequenceFeature may reference the
99 * same objects in the map.
103 public SequenceFeature(SequenceFeature cpy)
105 this(cpy, cpy.getBegin(), cpy.getEnd(), cpy.getFeatureGroup(),
118 public SequenceFeature(String theType, String theDesc, int theBegin,
119 int theEnd, String group)
121 this(theType, theDesc, theBegin, theEnd, NO_SCORE, group);
125 * Constructor including a score value
134 public SequenceFeature(String theType, String theDesc, int theBegin,
135 int theEnd, float theScore, String group)
138 this.description = theDesc;
139 this.begin = theBegin;
141 this.featureGroup = group;
142 this.score = theScore;
145 * for now, only "Disulfide/disulphide bond" is treated as a contact feature
147 this.contactFeature = "disulfide bond".equalsIgnoreCase(type)
148 || "disulphide bond".equalsIgnoreCase(type);
152 * A copy constructor that allows the value of final fields to be 'modified'
161 public SequenceFeature(SequenceFeature sf, String newType, int newBegin,
162 int newEnd, String newGroup, float newScore)
164 this(newType, sf.getDescription(), newBegin, newEnd, newScore,
167 this.source = sf.source;
169 if (sf.otherDetails != null)
171 otherDetails = new LinkedHashMap<>();
172 otherDetails.putAll(sf.otherDetails);
174 if (sf.links != null && sf.links.size() > 0)
176 links = new Vector<>();
177 links.addAll(sf.links);
182 * A copy constructor that allows the value of final fields to be 'modified'
190 public SequenceFeature(SequenceFeature sf, int newBegin, int newEnd,
191 String newGroup, float newScore)
193 this(sf, sf.getType(), newBegin, newEnd, newGroup, newScore);
197 * Two features are considered equal if they have the same type, group,
198 * description, start, end, phase, strand, and (if present) 'Name', ID' and
199 * 'Parent' attributes.
201 * Note we need to check Parent to distinguish the same exon occurring in
202 * different transcripts (in Ensembl GFF). This allows assembly of transcript
203 * sequences from their component exon regions.
206 public boolean equals(Object o)
208 return equals(o, false);
212 * Overloaded method allows the equality test to optionally ignore the
213 * 'Parent' attribute of a feature. This supports avoiding adding many
214 * superficially duplicate 'exon' or CDS features to genomic or protein
218 * @param ignoreParent
221 public boolean equals(Object o, boolean ignoreParent)
223 if (o == null || !(o instanceof SequenceFeature))
228 SequenceFeature sf = (SequenceFeature) o;
229 boolean sameScore = Float.isNaN(score) ? Float.isNaN(sf.score)
231 if (begin != sf.begin || end != sf.end || !sameScore)
236 if (getStrand() != sf.getStrand())
241 if (!(type + description + featureGroup + getPhase()).equals(
242 sf.type + sf.description + sf.featureGroup + sf.getPhase()))
246 if (!equalAttribute(getValue("ID"), sf.getValue("ID")))
250 if (!equalAttribute(getValue("Name"), sf.getValue("Name")))
256 if (!equalAttribute(getValue("Parent"), sf.getValue("Parent")))
265 * Returns true if both values are null, are both non-null and equal
271 protected static boolean equalAttribute(Object att1, Object att2)
273 if (att1 == null && att2 == null)
279 return att1.equals(att2);
281 return att2.equals(att1);
287 * @return DOCUMENT ME!
290 public int getBegin()
298 * @return DOCUMENT ME!
309 * @return DOCUMENT ME!
311 public String getType()
319 * @return DOCUMENT ME!
321 public String getDescription()
326 public void setDescription(String desc)
331 public String getFeatureGroup()
337 * Adds a hyperlink for the feature. This should have the format label|url.
341 public void addLink(String labelLink)
345 links = new Vector<>();
348 if (!links.contains(labelLink))
350 links.insertElementAt(labelLink, 0);
354 public float getScore()
360 * Used for getting values which are not in the basic set. eg STRAND, PHASE
366 public Object getValue(String key)
368 if (otherDetails == null)
374 return otherDetails.get(key);
379 * Answers the value of the specified attribute as string, or null if no such
380 * value. If more than one attribute name is provided, tries to resolve as
381 * keys to nested maps. For example, if attribute "CSQ" holds a map of
382 * key-value pairs, then getValueAsString("CSQ", "Allele") returns the value
383 * of "Allele" in that map.
388 public String getValueAsString(String... key)
390 if (otherDetails == null)
394 Object value = otherDetails.get(key[0]);
395 if (key.length > 1 && value instanceof Map<?, ?>)
397 value = ((Map) value).get(key[1]);
399 return value == null ? null : value.toString();
403 * Returns a property value for the given key if known, else the specified
407 * @param defaultValue
410 public Object getValue(String key, Object defaultValue)
412 Object value = getValue(key);
413 return value == null ? defaultValue : value;
417 * Used for setting values which are not in the basic set. eg STRAND, FRAME
425 public void setValue(String key, Object value)
429 if (otherDetails == null)
432 * LinkedHashMap preserves insertion order of attributes
434 otherDetails = new LinkedHashMap<>();
437 otherDetails.put(key, value);
438 recordAttribute(key, value);
443 * Notifies the addition of a feature attribute. This lets us keep track of
444 * which attributes are present on each feature type, and also the range of
445 * numerical-valued attributes.
450 protected void recordAttribute(String key, Object value)
452 String attDesc = null;
455 attDesc = FeatureSources.getInstance().getSource(source)
456 .getAttributeName(key);
459 FeatureAttributes.getInstance().addAttribute(this.type, attDesc, value,
464 * The following methods are added to maintain the castor Uniprot mapping file
467 public void setStatus(String status)
469 setValue(STATUS, status);
472 public String getStatus()
474 return (String) getValue(STATUS);
478 * Return 1 for forward strand ('+' in GFF), -1 for reverse strand ('-' in
479 * GFF), and 0 for unknown or not (validly) specified
483 public int getStrand()
486 if (otherDetails != null)
488 Object str = otherDetails.get(STRAND);
493 else if ("+".equals(str))
502 * Set the value of strand
505 * should be "+" for forward, or "-" for reverse
507 public void setStrand(String strand)
509 setValue(STRAND, strand);
512 public void setPhase(String phase)
514 setValue(PHASE, phase);
517 public String getPhase()
519 return (String) getValue(PHASE);
523 * Sets the 'raw' ENA format location specifier e.g. join(12..45,89..121)
527 public void setEnaLocation(String loc)
529 setValue(LOCATION, loc);
533 * Gets the 'raw' ENA format location specifier e.g. join(12..45,89..121)
537 public String getEnaLocation()
539 return (String) getValue(LOCATION);
543 * Readable representation, for debug only, not guaranteed not to change
547 public String toString()
549 return String.format("%d %d %s %s", getBegin(), getEnd(), getType(),
554 * Overridden to ensure that whenever two objects are equal, they have the
558 public int hashCode()
560 String s = getType() + getDescription() + getFeatureGroup()
561 + getValue("ID") + getValue("Name") + getValue("Parent")
563 return s.hashCode() + getBegin() + getEnd() + (int) getScore()
568 * Answers true if the feature's start/end values represent two related
569 * positions, rather than ends of a range. Such features may be visualised or
570 * reported differently to features on a range.
573 public boolean isContactFeature()
575 return contactFeature;
579 * Answers true if the sequence has zero start and end position
583 public boolean isNonPositional()
585 return begin == 0 && end == 0;
589 * Answers an html-formatted report of feature details. If parameter
590 * {@code mf} is not null, the feature is a virtual linked feature, and
591 * details included both the original location and the mapped location
599 public String getDetailsReport(String seqName, MappedFeatures mf)
601 FeatureSourceI metadata = FeatureSources.getInstance()
604 StringBuilder sb = new StringBuilder(128);
606 sb.append("<table>");
607 String name = mf == null ? seqName : mf.getLinkedSequenceName();
608 sb.append(String.format(ROW_DATA, "Location", name, begin == end ? begin
609 : begin + (isContactFeature() ? ":" : "-") + end));
611 String consequence = "";
614 int[] localRange = mf.getMappedPositions(begin, end);
615 int from = localRange[0];
616 int to = localRange[localRange.length - 1];
617 String s = mf.isFromCds() ? "Peptide Location" : "Coding location";
618 sb.append(String.format(ROW_DATA, s, seqName, from == to ? from
619 : from + (isContactFeature() ? ":" : "-") + to));
622 consequence = mf.findProteinVariants(this);
625 sb.append(String.format(ROW_DATA, "Type", type, ""));
626 String desc = StringUtils.stripHtmlTags(description);
627 sb.append(String.format(ROW_DATA, "Description", desc, ""));
628 if (!Float.isNaN(score) && score != 0f)
630 sb.append(String.format(ROW_DATA, "Score", score, ""));
632 if (featureGroup != null)
634 sb.append(String.format(ROW_DATA, "Group", featureGroup, ""));
637 if (!consequence.isEmpty())
639 sb.append(String.format(ROW_DATA, "Consequence",
640 "<i>Translated by Jalview</i>", consequence));
643 if (otherDetails != null)
645 TreeMap<String, Object> ordered = new TreeMap<>(
646 String.CASE_INSENSITIVE_ORDER);
647 ordered.putAll(otherDetails);
649 for (Entry<String, Object> entry : ordered.entrySet())
651 String key = entry.getKey();
653 Object value = entry.getValue();
654 if (value instanceof Map<?, ?>)
657 * expand values in a Map attribute across separate lines
658 * copy to a TreeMap for alphabetical ordering
660 Map<String, Object> values = (Map<String, Object>) value;
661 SortedMap<String, Object> sm = new TreeMap<>(
662 String.CASE_INSENSITIVE_ORDER);
664 for (Entry<?, ?> e : sm.entrySet())
666 sb.append(String.format(ROW_DATA, key, e.getKey().toString(),
667 e.getValue().toString()));
672 // tried <td title="key"> but it failed to provide a tooltip :-(
673 String attDesc = null;
674 if (metadata != null)
676 attDesc = metadata.getAttributeName(key);
678 String s = entry.getValue().toString();
679 if (isValueInteresting(key, s, metadata))
681 sb.append(String.format(ROW_DATA, key,
682 attDesc == null ? "" : attDesc, s));
687 sb.append("</table>");
689 String text = sb.toString();
694 * Answers true if we judge the value is worth displaying, by some heuristic
702 boolean isValueInteresting(String key, String value,
703 FeatureSourceI metadata)
706 * currently suppressing zero values as well as null or empty
708 if (value == null || "".equals(value) || ".".equals(value)
709 || "0".equals(value))
714 if (metadata == null)
719 FeatureAttributeType attType = metadata.getAttributeType(key);
720 if (attType != null && (attType == FeatureAttributeType.Float
721 || attType.equals(FeatureAttributeType.Integer)))
725 float fval = Float.valueOf(value);
730 } catch (NumberFormatException e)
736 return true; // default to interesting
740 * Sets the feature source identifier
744 public void setSource(String theSource)
750 class SFSortByEnd implements Comparator<SequenceFeature>
753 public int compare(SequenceFeature a, SequenceFeature b)
755 return a.getEnd() - b.getEnd();
759 class SFSortByBegin implements Comparator<SequenceFeature>
762 public int compare(SequenceFeature a, SequenceFeature b)
764 return a.getBegin() - b.getBegin();