2 * Jalview - A Sequence Alignment Editor and Viewer ($$Version-Rel$$)
3 * Copyright (C) $$Year-Rel$$ The Jalview Authors
5 * This file is part of Jalview.
7 * Jalview is free software: you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation, either version 3
10 * of the License, or (at your option) any later version.
12 * Jalview is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty
14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>.
19 * The Jalview Authors are detailed in the 'AUTHORS' file.
21 package jalview.datamodel;
23 import java.util.Comparator;
24 import java.util.LinkedHashMap;
26 import java.util.Map.Entry;
27 import java.util.SortedMap;
28 import java.util.TreeMap;
29 import java.util.Vector;
31 import jalview.datamodel.features.FeatureAttributeType;
32 import jalview.datamodel.features.FeatureAttributes;
33 import jalview.datamodel.features.FeatureLocationI;
34 import jalview.datamodel.features.FeatureSourceI;
35 import jalview.datamodel.features.FeatureSources;
36 import jalview.util.StringUtils;
39 * A class that models a single contiguous feature on a sequence. If flag
40 * 'contactFeature' is true, the start and end positions are interpreted instead
41 * as two contact points.
43 public class SequenceFeature implements FeatureLocationI
46 * score value if none is set; preferably Float.Nan, but see
47 * JAL-2060 and JAL-2554 for a couple of blockers to that
49 private static final float NO_SCORE = 0f;
51 private static final String STATUS = "status";
53 public static final String STRAND = "STRAND";
55 // key for Phase designed not to conflict with real GFF data
56 public static final String PHASE = "!Phase";
58 // private key for ENA location designed not to conflict with real GFF data
59 private static final String LOCATION = "!Location";
61 private static final String ROW_DATA = "<tr><td>%s</td><td>%s</td><td>%s</td></tr>";
64 * type, begin, end, featureGroup, score and contactFeature are final
65 * to ensure that the integrity of SequenceFeatures data store
66 * can't be broken by direct update of these fields
68 public final String type;
70 public final int begin;
74 public final String featureGroup;
76 public final float score;
78 private final boolean contactFeature;
80 public String description;
83 * a map of key-value pairs; may be populated from GFF 'column 9' data,
84 * other data sources (e.g. GenBank file), or programmatically
86 public Map<String, Object> otherDetails;
88 public Vector<String> links;
91 * the identifier (if known) for the FeatureSource held in FeatureSources,
92 * as a provider of metadata about feature attributes
94 private String source;
97 * Constructs a duplicate feature. Note: Uses makes a shallow copy of the
98 * otherDetails map, so the new and original SequenceFeature may reference the
99 * same objects in the map.
103 public SequenceFeature(SequenceFeature cpy)
105 this(cpy, cpy.getBegin(), cpy.getEnd(), cpy.getFeatureGroup(), cpy
118 public SequenceFeature(String theType, String theDesc, int theBegin,
119 int theEnd, String group)
121 this(theType, theDesc, theBegin, theEnd, NO_SCORE, group);
125 * Constructor including a score value
134 public SequenceFeature(String theType, String theDesc, int theBegin,
135 int theEnd, float theScore, String group)
138 this.description = theDesc;
139 this.begin = theBegin;
141 this.featureGroup = group;
142 this.score = theScore;
145 * for now, only "Disulfide/disulphide bond" is treated as a contact feature
147 this.contactFeature = "disulfide bond".equalsIgnoreCase(type)
148 || "disulphide bond".equalsIgnoreCase(type);
152 * A copy constructor that allows the value of final fields to be 'modified'
161 public SequenceFeature(SequenceFeature sf, String newType, int newBegin,
162 int newEnd, String newGroup, float newScore)
164 this(newType, sf.getDescription(), newBegin, newEnd, newScore,
167 this.source = sf.source;
169 if (sf.otherDetails != null)
171 otherDetails = new LinkedHashMap<>();
172 otherDetails.putAll(sf.otherDetails);
174 if (sf.links != null && sf.links.size() > 0)
176 links = new Vector<>();
177 links.addAll(sf.links);
182 * A copy constructor that allows the value of final fields to be 'modified'
190 public SequenceFeature(SequenceFeature sf, int newBegin, int newEnd,
191 String newGroup, float newScore)
193 this(sf, sf.getType(), newBegin, newEnd, newGroup, newScore);
197 * Two features are considered equal if they have the same type, group,
198 * description, start, end, phase, strand, and (if present) 'Name', ID' and
199 * 'Parent' attributes.
201 * Note we need to check Parent to distinguish the same exon occurring in
202 * different transcripts (in Ensembl GFF). This allows assembly of transcript
203 * sequences from their component exon regions.
206 public boolean equals(Object o)
208 return equals(o, false);
212 * Overloaded method allows the equality test to optionally ignore the
213 * 'Parent' attribute of a feature. This supports avoiding adding many
214 * superficially duplicate 'exon' or CDS features to genomic or protein
218 * @param ignoreParent
221 public boolean equals(Object o, boolean ignoreParent)
223 if (o == null || !(o instanceof SequenceFeature))
228 SequenceFeature sf = (SequenceFeature) o;
229 boolean sameScore = Float.isNaN(score) ? Float.isNaN(sf.score)
231 if (begin != sf.begin || end != sf.end || !sameScore)
236 if (getStrand() != sf.getStrand())
241 if (!(type + description + featureGroup + getPhase()).equals(
242 sf.type + sf.description + sf.featureGroup + sf.getPhase()))
246 if (!equalAttribute(getValue("ID"), sf.getValue("ID")))
250 if (!equalAttribute(getValue("Name"), sf.getValue("Name")))
256 if (!equalAttribute(getValue("Parent"), sf.getValue("Parent")))
265 * Returns true if both values are null, are both non-null and equal
271 protected static boolean equalAttribute(Object att1, Object att2)
273 if (att1 == null && att2 == null)
279 return att1.equals(att2);
281 return att2.equals(att1);
287 * @return DOCUMENT ME!
290 public int getBegin()
298 * @return DOCUMENT ME!
309 * @return DOCUMENT ME!
311 public String getType()
319 * @return DOCUMENT ME!
321 public String getDescription()
326 public void setDescription(String desc)
331 public String getFeatureGroup()
337 * Adds a hyperlink for the feature. This should have the format label|url.
341 public void addLink(String labelLink)
345 links = new Vector<>();
348 if (!links.contains(labelLink))
350 links.insertElementAt(labelLink, 0);
354 public float getScore()
360 * Used for getting values which are not in the basic set. eg STRAND, PHASE
366 public Object getValue(String key)
368 if (otherDetails == null)
374 return otherDetails.get(key);
379 * Answers the value of the specified attribute as string, or null if no such
380 * value. If more than one attribute name is provided, tries to resolve as keys
381 * to nested maps. For example, if attribute "CSQ" holds a map of key-value
382 * pairs, then getValueAsString("CSQ", "Allele") returns the value of "Allele"
388 public String getValueAsString(String... key)
390 if (otherDetails == null)
394 Object value = otherDetails.get(key[0]);
395 if (key.length > 1 && value instanceof Map<?, ?>)
397 value = ((Map) value).get(key[1]);
399 return value == null ? null : value.toString();
403 * Returns a property value for the given key if known, else the specified
407 * @param defaultValue
410 public Object getValue(String key, Object defaultValue)
412 Object value = getValue(key);
413 return value == null ? defaultValue : value;
417 * Used for setting values which are not in the basic set. eg STRAND, FRAME
425 public void setValue(String key, Object value)
429 if (otherDetails == null)
432 * LinkedHashMap preserves insertion order of attributes
434 otherDetails = new LinkedHashMap<>();
437 otherDetails.put(key, value);
438 recordAttribute(key, value);
443 * Notifies the addition of a feature attribute. This lets us keep track of
444 * which attributes are present on each feature type, and also the range of
445 * numerical-valued attributes.
450 protected void recordAttribute(String key, Object value)
452 String attDesc = null;
455 attDesc = FeatureSources.getInstance().getSource(source)
456 .getAttributeName(key);
459 FeatureAttributes.getInstance().addAttribute(this.type, attDesc, value,
464 * The following methods are added to maintain the castor Uniprot mapping file
467 public void setStatus(String status)
469 setValue(STATUS, status);
472 public String getStatus()
474 return (String) getValue(STATUS);
478 * Return 1 for forward strand ('+' in GFF), -1 for reverse strand ('-' in
479 * GFF), and 0 for unknown or not (validly) specified
483 public int getStrand()
486 if (otherDetails != null)
488 Object str = otherDetails.get(STRAND);
493 else if ("+".equals(str))
502 * Set the value of strand
505 * should be "+" for forward, or "-" for reverse
507 public void setStrand(String strand)
509 setValue(STRAND, strand);
512 public void setPhase(String phase)
514 setValue(PHASE, phase);
517 public String getPhase()
519 return (String) getValue(PHASE);
523 * Sets the 'raw' ENA format location specifier e.g. join(12..45,89..121)
527 public void setEnaLocation(String loc)
529 setValue(LOCATION, loc);
533 * Gets the 'raw' ENA format location specifier e.g. join(12..45,89..121)
537 public String getEnaLocation()
539 return (String) getValue(LOCATION);
543 * Readable representation, for debug only, not guaranteed not to change
547 public String toString()
549 return String.format("%d %d %s %s", getBegin(), getEnd(), getType(),
554 * Overridden to ensure that whenever two objects are equal, they have the
558 public int hashCode()
560 String s = getType() + getDescription() + getFeatureGroup()
561 + getValue("ID") + getValue("Name") + getValue("Parent")
563 return s.hashCode() + getBegin() + getEnd() + (int) getScore()
568 * Answers true if the feature's start/end values represent two related
569 * positions, rather than ends of a range. Such features may be visualised or
570 * reported differently to features on a range.
573 public boolean isContactFeature()
575 return contactFeature;
579 * Answers true if the sequence has zero start and end position
583 public boolean isNonPositional()
585 return begin == 0 && end == 0;
589 * Answers an html-formatted report of feature details. If parameter
590 * {@code mf} is not null, the feature is a virtual linked feature, and
591 * details included both the original location and the mapped location
599 public String getDetailsReport(String seqName, MappedFeatures mf)
601 FeatureSourceI metadata = FeatureSources.getInstance()
604 StringBuilder sb = new StringBuilder(128);
606 sb.append("<table>");
607 String name = mf == null ? seqName : mf.getLinkedSequenceName();
608 sb.append(String.format(ROW_DATA, "Location", name,
610 : begin + (isContactFeature() ? ":" : "-") + end));
612 String consequence = "";
615 int[] localRange = mf.getMappedPositions(begin, end);
616 int from = localRange[0];
617 int to = localRange[localRange.length - 1];
618 String s = mf.isFromCds() ? "Peptide Location" : "Coding location";
619 sb.append(String.format(ROW_DATA, s, seqName, from == to ? from
620 : from + (isContactFeature() ? ":" : "-") + to));
623 consequence = mf.findProteinVariants(this);
626 sb.append(String.format(ROW_DATA, "Type", type, ""));
627 String desc = StringUtils.stripHtmlTags(description);
628 sb.append(String.format(ROW_DATA, "Description", desc, ""));
629 if (!Float.isNaN(score) && score != 0f)
631 sb.append(String.format(ROW_DATA, "Score", score, ""));
633 if (featureGroup != null)
635 sb.append(String.format(ROW_DATA, "Group", featureGroup, ""));
638 if (!consequence.isEmpty())
640 sb.append(String.format(ROW_DATA, "Consequence",
641 "<i>Translated by Jalview</i>", consequence));
644 if (otherDetails != null)
646 TreeMap<String, Object> ordered = new TreeMap<>(
647 String.CASE_INSENSITIVE_ORDER);
648 ordered.putAll(otherDetails);
650 for (Entry<String, Object> entry : ordered.entrySet())
652 String key = entry.getKey();
654 Object value = entry.getValue();
655 if (value instanceof Map<?, ?>)
658 * expand values in a Map attribute across separate lines
659 * copy to a TreeMap for alphabetical ordering
661 Map<String, Object> values = (Map<String, Object>) value;
662 SortedMap<String, Object> sm = new TreeMap<>(
663 String.CASE_INSENSITIVE_ORDER);
665 for (Entry<?, ?> e : sm.entrySet())
667 sb.append(String.format(ROW_DATA, key, e.getKey().toString(), e
668 .getValue().toString()));
673 // tried <td title="key"> but it failed to provide a tooltip :-(
674 String attDesc = null;
675 if (metadata != null)
677 attDesc = metadata.getAttributeName(key);
679 String s = entry.getValue().toString();
680 if (isValueInteresting(key, s, metadata))
682 sb.append(String.format(ROW_DATA, key, attDesc == null ? ""
688 sb.append("</table>");
690 String text = sb.toString();
695 * Answers true if we judge the value is worth displaying, by some heuristic
703 boolean isValueInteresting(String key, String value,
704 FeatureSourceI metadata)
707 * currently suppressing zero values as well as null or empty
709 if (value == null || "".equals(value) || ".".equals(value)
710 || "0".equals(value))
715 if (metadata == null)
720 FeatureAttributeType attType = metadata.getAttributeType(key);
722 && (attType == FeatureAttributeType.Float || attType
723 .equals(FeatureAttributeType.Integer)))
727 float fval = Float.valueOf(value);
732 } catch (NumberFormatException e)
738 return true; // default to interesting
742 * Sets the feature source identifier
746 public void setSource(String theSource)
752 class SFSortByEnd implements Comparator<SequenceFeature>
755 public int compare(SequenceFeature a, SequenceFeature b)
757 return a.getEnd() - b.getEnd();
761 class SFSortByBegin implements Comparator<SequenceFeature>
764 public int compare(SequenceFeature a, SequenceFeature b)
766 return a.getBegin() - b.getBegin();